150 changes: 75 additions & 75 deletions llvm/test/CodeGen/X86/stack-folding-int-avx512.ll

Large diffs are not rendered by default.

90 changes: 45 additions & 45 deletions llvm/test/CodeGen/X86/vec-libcalls.ll
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ define <1 x float> @sin_v1f32(<1 x float> %x) nounwind {
; CHECK-LABEL: sin_v1f32:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq sinf
; CHECK-NEXT: callq sinf@PLT
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
%r = call <1 x float> @llvm.sin.v1f32(<1 x float> %x)
Expand All @@ -52,11 +52,11 @@ define <2 x float> @sin_v2f32(<2 x float> %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: callq sinf
; CHECK-NEXT: callq sinf@PLT
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
; CHECK-NEXT: callq sinf
; CHECK-NEXT: callq sinf@PLT
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; CHECK-NEXT: addq $40, %rsp
Expand All @@ -70,17 +70,17 @@ define <3 x float> @sin_v3f32(<3 x float> %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: callq sinf
; CHECK-NEXT: callq sinf@PLT
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
; CHECK-NEXT: callq sinf
; CHECK-NEXT: callq sinf@PLT
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[1,0]
; CHECK-NEXT: callq sinf
; CHECK-NEXT: callq sinf@PLT
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; CHECK-NEXT: addq $40, %rsp
Expand All @@ -94,23 +94,23 @@ define <4 x float> @sin_v4f32(<4 x float> %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: callq sinf
; CHECK-NEXT: callq sinf@PLT
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
; CHECK-NEXT: callq sinf
; CHECK-NEXT: callq sinf@PLT
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[1,0]
; CHECK-NEXT: callq sinf
; CHECK-NEXT: callq sinf@PLT
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
; CHECK-NEXT: callq sinf
; CHECK-NEXT: callq sinf@PLT
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; CHECK-NEXT: addq $40, %rsp
Expand All @@ -122,37 +122,37 @@ define <4 x float> @sin_v4f32(<4 x float> %x) nounwind {
define <5 x float> @sin_v5f32(<5 x float> %x) nounwind {
; CHECK-LABEL: sin_v5f32:
; CHECK: # %bb.0:
; CHECK-NEXT: subq $88, %rsp
; CHECK-NEXT: subq $72, %rsp
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: callq sinf
; CHECK-NEXT: callq sinf@PLT
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
; CHECK-NEXT: callq sinf
; CHECK-NEXT: callq sinf@PLT
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[1,0]
; CHECK-NEXT: callq sinf
; CHECK-NEXT: callq sinf@PLT
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
; CHECK-NEXT: callq sinf
; CHECK-NEXT: callq sinf@PLT
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: callq sinf
; CHECK-NEXT: callq sinf@PLT
; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; CHECK-NEXT: addq $88, %rsp
; CHECK-NEXT: addq $72, %rsp
; CHECK-NEXT: retq
%r = call <5 x float> @llvm.sin.v5f32(<5 x float> %x)
ret <5 x float> %r
Expand All @@ -161,43 +161,43 @@ define <5 x float> @sin_v5f32(<5 x float> %x) nounwind {
define <6 x float> @sin_v6f32(<6 x float> %x) nounwind {
; CHECK-LABEL: sin_v6f32:
; CHECK: # %bb.0:
; CHECK-NEXT: subq $88, %rsp
; CHECK-NEXT: subq $72, %rsp
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: callq sinf
; CHECK-NEXT: callq sinf@PLT
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
; CHECK-NEXT: callq sinf
; CHECK-NEXT: callq sinf@PLT
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: callq sinf
; CHECK-NEXT: callq sinf@PLT
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
; CHECK-NEXT: callq sinf
; CHECK-NEXT: callq sinf@PLT
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[1,0]
; CHECK-NEXT: callq sinf
; CHECK-NEXT: callq sinf@PLT
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
; CHECK-NEXT: callq sinf
; CHECK-NEXT: callq sinf@PLT
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
; CHECK-NEXT: addq $88, %rsp
; CHECK-NEXT: addq $72, %rsp
; CHECK-NEXT: retq
%r = call <6 x float> @llvm.sin.v6f32(<6 x float> %x)
ret <6 x float> %r
Expand All @@ -206,25 +206,25 @@ define <6 x float> @sin_v6f32(<6 x float> %x) nounwind {
define <3 x double> @sin_v3f64(<3 x double> %x) nounwind {
; CHECK-LABEL: sin_v3f64:
; CHECK: # %bb.0:
; CHECK-NEXT: subq $88, %rsp
; CHECK-NEXT: subq $72, %rsp
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: callq sin
; CHECK-NEXT: callq sin@PLT
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[1,0]
; CHECK-NEXT: callq sin
; CHECK-NEXT: callq sin@PLT
; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: callq sin
; CHECK-NEXT: callq sin@PLT
; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; CHECK-NEXT: addq $88, %rsp
; CHECK-NEXT: addq $72, %rsp
; CHECK-NEXT: retq
%r = call <3 x double> @llvm.sin.v3f64(<3 x double> %x)
ret <3 x double> %r
Expand All @@ -233,7 +233,7 @@ define <3 x double> @sin_v3f64(<3 x double> %x) nounwind {
define <2 x float> @fabs_v2f32(<2 x float> %x) nounwind {
; CHECK-LABEL: fabs_v2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: retq
%r = call <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
ret <2 x float> %r
Expand All @@ -253,11 +253,11 @@ define <2 x float> @cos_v2f32(<2 x float> %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: callq cosf
; CHECK-NEXT: callq cosf@PLT
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
; CHECK-NEXT: callq cosf
; CHECK-NEXT: callq cosf@PLT
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; CHECK-NEXT: addq $40, %rsp
Expand All @@ -271,11 +271,11 @@ define <2 x float> @exp_v2f32(<2 x float> %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: callq expf
; CHECK-NEXT: callq expf@PLT
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
; CHECK-NEXT: callq expf
; CHECK-NEXT: callq expf@PLT
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; CHECK-NEXT: addq $40, %rsp
Expand All @@ -289,11 +289,11 @@ define <2 x float> @exp2_v2f32(<2 x float> %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: callq exp2f
; CHECK-NEXT: callq exp2f@PLT
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
; CHECK-NEXT: callq exp2f
; CHECK-NEXT: callq exp2f@PLT
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; CHECK-NEXT: addq $40, %rsp
Expand All @@ -316,11 +316,11 @@ define <2 x float> @log_v2f32(<2 x float> %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: callq logf
; CHECK-NEXT: callq logf@PLT
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
; CHECK-NEXT: callq logf
; CHECK-NEXT: callq logf@PLT
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; CHECK-NEXT: addq $40, %rsp
Expand All @@ -334,11 +334,11 @@ define <2 x float> @log10_v2f32(<2 x float> %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: callq log10f
; CHECK-NEXT: callq log10f@PLT
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
; CHECK-NEXT: callq log10f
; CHECK-NEXT: callq log10f@PLT
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; CHECK-NEXT: addq $40, %rsp
Expand All @@ -352,11 +352,11 @@ define <2 x float> @log2_v2f32(<2 x float> %x) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: callq log2f
; CHECK-NEXT: callq log2f@PLT
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
; CHECK-NEXT: callq log2f
; CHECK-NEXT: callq log2f@PLT
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; CHECK-NEXT: addq $40, %rsp
Expand Down Expand Up @@ -386,8 +386,8 @@ define <2 x float> @rint_v2f32(<2 x float> %x) nounwind {
define <2 x float> @round_v2f32(<2 x float> %x) nounwind {
; CHECK-LABEL: round_v2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm1
; CHECK-NEXT: vorps {{.*}}(%rip), %xmm1, %xmm1
; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vroundps $11, %xmm0, %xmm0
; CHECK-NEXT: retq
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/X86/vec-strict-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,8 @@ define <2 x double> @f12(<2 x double> %a0, <4 x float> %a1) #0 {
define <4 x float> @f13(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
; SSE-X86-LABEL: f13:
; SSE-X86: # %bb.0:
; SSE-X86-NEXT: subl $108, %esp
; SSE-X86-NEXT: .cfi_def_cfa_offset 112
; SSE-X86-NEXT: subl $100, %esp
; SSE-X86-NEXT: .cfi_def_cfa_offset 104
; SSE-X86-NEXT: movups %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; SSE-X86-NEXT: movups %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; SSE-X86-NEXT: movups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
Expand Down Expand Up @@ -282,7 +282,7 @@ define <4 x float> @f13(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
; SSE-X86-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE-X86-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE-X86-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-X86-NEXT: addl $108, %esp
; SSE-X86-NEXT: addl $100, %esp
; SSE-X86-NEXT: .cfi_def_cfa_offset 4
; SSE-X86-NEXT: retl
;
Expand All @@ -296,30 +296,30 @@ define <4 x float> @f13(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
; SSE-X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; SSE-X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
; SSE-X64-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,3,3,3]
; SSE-X64-NEXT: callq fmaf
; SSE-X64-NEXT: callq fmaf@PLT
; SSE-X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; SSE-X64-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
; SSE-X64-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
; SSE-X64-NEXT: callq fmaf
; SSE-X64-NEXT: callq fmaf@PLT
; SSE-X64-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload
; SSE-X64-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; SSE-X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
; SSE-X64-NEXT: callq fmaf
; SSE-X64-NEXT: callq fmaf@PLT
; SSE-X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; SSE-X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
; SSE-X64-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,1,1]
; SSE-X64-NEXT: callq fmaf
; SSE-X64-NEXT: callq fmaf@PLT
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; SSE-X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE-X64-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload
Expand Down Expand Up @@ -381,15 +381,15 @@ define <2 x double> @f14(<2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
; SSE-X64-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-X64-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; SSE-X64-NEXT: callq fma
; SSE-X64-NEXT: callq fma@PLT
; SSE-X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-X64-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; SSE-X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; SSE-X64-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
; SSE-X64-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
; SSE-X64-NEXT: callq fma
; SSE-X64-NEXT: callq fma@PLT
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; SSE-X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-X64-NEXT: movaps %xmm1, %xmm0
Expand Down
538 changes: 269 additions & 269 deletions llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll

Large diffs are not rendered by default.

230 changes: 115 additions & 115 deletions llvm/test/CodeGen/X86/vector-half-conversions.ll

Large diffs are not rendered by default.

40 changes: 20 additions & 20 deletions llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ define void @vf16(<64 x i16>* %in.vec, <16 x i16>* %out.vec0, <16 x i16>* %out.v
define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out.vec1, <32 x i16>* %out.vec2, <32 x i16>* %out.vec3) nounwind {
; AVX2-SLOW-LABEL: vf32:
; AVX2-SLOW: # %bb.0:
; AVX2-SLOW-NEXT: subq $280, %rsp # imm = 0x118
; AVX2-SLOW-NEXT: subq $248, %rsp
; AVX2-SLOW-NEXT: vmovdqa (%rdi), %xmm5
; AVX2-SLOW-NEXT: vmovdqa %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX2-SLOW-NEXT: vmovdqa 16(%rdi), %xmm9
Expand All @@ -529,7 +529,7 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out.
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0],xmm0[1,2,3],xmm4[4],xmm0[5,6,7]
; AVX2-SLOW-NEXT: vpackusdw %xmm2, %xmm3, %xmm2
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm3 = xmm9[0],xmm0[1,2,3],xmm9[4],xmm0[5,6,7]
; AVX2-SLOW-NEXT: vmovdqa %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX2-SLOW-NEXT: vmovdqa %xmm9, (%rsp) # 16-byte Spill
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0],xmm0[1,2,3],xmm5[4],xmm0[5,6,7]
; AVX2-SLOW-NEXT: vpackusdw %xmm3, %xmm4, %xmm3
; AVX2-SLOW-NEXT: vpackusdw %xmm2, %xmm3, %xmm2
Expand Down Expand Up @@ -570,7 +570,7 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out.
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm2 = xmm10[0],xmm0[1,2,3],xmm10[4],xmm0[5,6,7]
; AVX2-SLOW-NEXT: vmovdqa 224(%rdi), %xmm11
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm3 = xmm11[0],xmm0[1,2,3],xmm11[4],xmm0[5,6,7]
; AVX2-SLOW-NEXT: vmovdqa %xmm11, (%rsp) # 16-byte Spill
; AVX2-SLOW-NEXT: vmovdqa %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX2-SLOW-NEXT: vpackusdw %xmm2, %xmm3, %xmm7
; AVX2-SLOW-NEXT: vmovdqa 208(%rdi), %xmm3
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0],xmm0[1,2,3],xmm3[4],xmm0[5,6,7]
Expand Down Expand Up @@ -665,7 +665,7 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out.
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm8[0,1,2,0,4,5,6,7]
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm13[0,1,2,0,4,5,6,7]
; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
; AVX2-SLOW-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Folded Reload
; AVX2-SLOW-NEXT: vpshufd $231, (%rsp), %xmm6 # 16-byte Folded Reload
; AVX2-SLOW-NEXT: # xmm6 = mem[3,1,2,3]
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm5 = xmm9[3,1,2,3]
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm6[2,0,2,3,4,5,6,7]
Expand All @@ -675,7 +675,7 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out.
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7]
; AVX2-SLOW-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm10 = xmm10[3,1,2,3]
; AVX2-SLOW-NEXT: vpshufd $231, (%rsp), %xmm9 # 16-byte Folded Reload
; AVX2-SLOW-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Folded Reload
; AVX2-SLOW-NEXT: # xmm9 = mem[3,1,2,3]
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm10[0,1,2,0,4,5,6,7]
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm9[0,1,2,0,4,5,6,7]
Expand Down Expand Up @@ -753,17 +753,17 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out.
; AVX2-SLOW-NEXT: vmovaps %ymm2, (%rcx)
; AVX2-SLOW-NEXT: vmovdqa %ymm1, 32(%r8)
; AVX2-SLOW-NEXT: vmovdqa %ymm0, (%r8)
; AVX2-SLOW-NEXT: addq $280, %rsp # imm = 0x118
; AVX2-SLOW-NEXT: addq $248, %rsp
; AVX2-SLOW-NEXT: vzeroupper
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-ALL-LABEL: vf32:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: subq $216, %rsp
; AVX2-FAST-ALL-NEXT: subq $200, %rsp
; AVX2-FAST-ALL-NEXT: vmovdqa 64(%rdi), %ymm5
; AVX2-FAST-ALL-NEXT: vmovdqu %ymm5, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX2-FAST-ALL-NEXT: vmovdqa 96(%rdi), %ymm6
; AVX2-FAST-ALL-NEXT: vmovdqu %ymm6, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX2-FAST-ALL-NEXT: vmovdqu %ymm6, (%rsp) # 32-byte Spill
; AVX2-FAST-ALL-NEXT: vmovdqa (%rdi), %xmm2
; AVX2-FAST-ALL-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX2-FAST-ALL-NEXT: vmovdqa 16(%rdi), %xmm11
Expand Down Expand Up @@ -793,7 +793,7 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out.
; AVX2-FAST-ALL-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX2-FAST-ALL-NEXT: vmovdqa 176(%rdi), %xmm13
; AVX2-FAST-ALL-NEXT: vpblendw {{.*#+}} xmm5 = xmm13[0],xmm8[1,2,3],xmm13[4],xmm8[5,6,7]
; AVX2-FAST-ALL-NEXT: vmovdqa %xmm13, (%rsp) # 16-byte Spill
; AVX2-FAST-ALL-NEXT: vmovdqa %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX2-FAST-ALL-NEXT: vmovdqa 160(%rdi), %xmm0
; AVX2-FAST-ALL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX2-FAST-ALL-NEXT: vpblendw {{.*#+}} xmm7 = xmm0[0],xmm8[1,2,3],xmm0[4],xmm8[5,6,7]
Expand Down Expand Up @@ -850,7 +850,7 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out.
; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX2-FAST-ALL-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,2,3,1,3,5,7]
; AVX2-FAST-ALL-NEXT: vpermd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm6 # 32-byte Folded Reload
; AVX2-FAST-ALL-NEXT: vpermd (%rsp), %ymm1, %ymm6 # 32-byte Folded Reload
; AVX2-FAST-ALL-NEXT: vpermd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm14 # 32-byte Folded Reload
; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm6[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29]
; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm2 = <0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u>
Expand All @@ -869,13 +869,13 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out.
; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm15[0],xmm0[0],xmm15[1],xmm0[1]
; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3]
; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm5[4,5,6,7]
; AVX2-FAST-ALL-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX2-FAST-ALL-NEXT: vmovdqu %ymm0, (%rsp) # 32-byte Spill
; AVX2-FAST-ALL-NEXT: vpermd %ymm10, %ymm1, %ymm2
; AVX2-FAST-ALL-NEXT: vpermd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm10 # 32-byte Folded Reload
; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm5 = ymm2[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29]
; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm8 = ymm10[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm8 = ymm8[0,1,2,3,4,5],ymm5[6,7]
; AVX2-FAST-ALL-NEXT: vpshufd $231, (%rsp), %xmm3 # 16-byte Folded Reload
; AVX2-FAST-ALL-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Folded Reload
; AVX2-FAST-ALL-NEXT: # xmm3 = mem[3,1,2,3]
; AVX2-FAST-ALL-NEXT: vpshufd {{.*#+}} xmm13 = xmm13[3,1,2,3]
; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm3[0,1,2,0,4,5,6,7]
Expand Down Expand Up @@ -924,17 +924,17 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out.
; AVX2-FAST-ALL-NEXT: vmovaps %ymm2, (%rdx)
; AVX2-FAST-ALL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
; AVX2-FAST-ALL-NEXT: vmovaps %ymm2, 32(%rcx)
; AVX2-FAST-ALL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
; AVX2-FAST-ALL-NEXT: vmovups (%rsp), %ymm2 # 32-byte Reload
; AVX2-FAST-ALL-NEXT: vmovaps %ymm2, (%rcx)
; AVX2-FAST-ALL-NEXT: vmovdqa %ymm1, 32(%r8)
; AVX2-FAST-ALL-NEXT: vmovdqa %ymm0, (%r8)
; AVX2-FAST-ALL-NEXT: addq $216, %rsp
; AVX2-FAST-ALL-NEXT: addq $200, %rsp
; AVX2-FAST-ALL-NEXT: vzeroupper
; AVX2-FAST-ALL-NEXT: retq
;
; AVX2-FAST-PERLANE-LABEL: vf32:
; AVX2-FAST-PERLANE: # %bb.0:
; AVX2-FAST-PERLANE-NEXT: subq $280, %rsp # imm = 0x118
; AVX2-FAST-PERLANE-NEXT: subq $248, %rsp
; AVX2-FAST-PERLANE-NEXT: vmovdqa (%rdi), %xmm6
; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX2-FAST-PERLANE-NEXT: vmovdqa 16(%rdi), %xmm1
Expand Down Expand Up @@ -991,7 +991,7 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out.
; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0],xmm0[1,2,3],xmm1[4],xmm0[5,6,7]
; AVX2-FAST-PERLANE-NEXT: vmovdqa 224(%rdi), %xmm13
; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm3 = xmm13[0],xmm0[1,2,3],xmm13[4],xmm0[5,6,7]
; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm13, (%rsp) # 16-byte Spill
; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm2, %xmm3, %xmm7
; AVX2-FAST-PERLANE-NEXT: vmovdqa 208(%rdi), %xmm3
; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0],xmm0[1,2,3],xmm3[4],xmm0[5,6,7]
Expand Down Expand Up @@ -1061,7 +1061,7 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out.
; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm1 = xmm12[3,1,2,3]
; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm4 = xmm14[3,1,2,3]
; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm4, (%rsp) # 16-byte Spill
; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7]
; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[2,0,2,3,4,5,6,7]
; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
Expand All @@ -1084,7 +1084,7 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out.
; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm11[4,5,6,7]
; AVX2-FAST-PERLANE-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm8 = xmm7[3,1,2,3]
; AVX2-FAST-PERLANE-NEXT: vpshufd $231, (%rsp), %xmm9 # 16-byte Folded Reload
; AVX2-FAST-PERLANE-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Folded Reload
; AVX2-FAST-PERLANE-NEXT: # xmm9 = mem[3,1,2,3]
; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm8[0,1,2,0,4,5,6,7]
; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm2 = xmm9[0,1,2,0,4,5,6,7]
Expand Down Expand Up @@ -1119,7 +1119,7 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out.
; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm6[0],xmm0[0],xmm6[1],xmm0[1]
; AVX2-FAST-PERLANE-NEXT: vpshuflw $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Folded Reload
; AVX2-FAST-PERLANE-NEXT: # xmm6 = mem[3,1,2,3,4,5,6,7]
; AVX2-FAST-PERLANE-NEXT: vpshuflw $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Folded Reload
; AVX2-FAST-PERLANE-NEXT: vpshuflw $231, (%rsp), %xmm7 # 16-byte Folded Reload
; AVX2-FAST-PERLANE-NEXT: # xmm7 = mem[3,1,2,3,4,5,6,7]
; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
Expand Down Expand Up @@ -1163,7 +1163,7 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out.
; AVX2-FAST-PERLANE-NEXT: vmovaps %ymm2, (%rcx)
; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm1, 32(%r8)
; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm0, (%r8)
; AVX2-FAST-PERLANE-NEXT: addq $280, %rsp # imm = 0x118
; AVX2-FAST-PERLANE-NEXT: addq $248, %rsp
; AVX2-FAST-PERLANE-NEXT: vzeroupper
; AVX2-FAST-PERLANE-NEXT: retq
%wide.vec = load <128 x i16>, <128 x i16>* %in.vec, align 32
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/X86/vzero-excess.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@ define <4 x float> @zeroupper_v4f32(<8 x float> *%x, <8 x float> %y) nounwind {
; CHECK-LABEL: zeroupper_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: subq $48, %rsp
; CHECK-NEXT: subq $32, %rsp
; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: callq the_unknown
; CHECK-NEXT: callq the_unknown@PLT
; CHECK-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; CHECK-NEXT: vaddps (%rbx), %ymm0, %ymm0
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: addq $48, %rsp
; CHECK-NEXT: addq $32, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
Expand All @@ -34,12 +34,12 @@ define <4 x float> @zeroupper_v4f32(<8 x float> *%x, <8 x float> %y) nounwind {
define <8 x float> @zeroupper_v8f32(<8 x float> %x) nounwind {
; CHECK-LABEL: zeroupper_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: subq $56, %rsp
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: callq the_unknown
; CHECK-NEXT: callq the_unknown@PLT
; CHECK-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: retq
call void @llvm.x86.avx.vzeroupper()
call void @the_unknown()
Expand All @@ -50,16 +50,16 @@ define <4 x float> @zeroall_v4f32(<8 x float> *%x, <8 x float> %y) nounwind {
; CHECK-LABEL: zeroall_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: subq $48, %rsp
; CHECK-NEXT: subq $32, %rsp
; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: vzeroall
; CHECK-NEXT: callq the_unknown
; CHECK-NEXT: callq the_unknown@PLT
; CHECK-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; CHECK-NEXT: vaddps (%rbx), %ymm0, %ymm0
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: addq $48, %rsp
; CHECK-NEXT: addq $32, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
Expand All @@ -76,12 +76,12 @@ define <4 x float> @zeroall_v4f32(<8 x float> *%x, <8 x float> %y) nounwind {
define <8 x float> @zeroall_v8f32(<8 x float> %x) nounwind {
; CHECK-LABEL: zeroall_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: subq $56, %rsp
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
; CHECK-NEXT: vzeroall
; CHECK-NEXT: callq the_unknown
; CHECK-NEXT: callq the_unknown@PLT
; CHECK-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: retq
call void @llvm.x86.avx.vzeroall()
call void @the_unknown()
Expand Down