6 changes: 4 additions & 2 deletions llvm/test/CodeGen/X86/vec_anyext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,8 @@ define <4 x i8> @func_8_64(ptr %a, ptr %b) nounwind {
define <4 x i16> @const_16_32() nounwind {
; CHECK-LABEL: const_16_32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = <0,3,8,7,u,u,u,u>
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [0,3,8,7,0,3,8,7]
; CHECK-NEXT: # xmm0 = mem[0,0]
; CHECK-NEXT: ret{{[l|q]}}
%G = trunc <4 x i32> <i32 0, i32 3, i32 8, i32 7> to <4 x i16>
ret <4 x i16> %G
Expand All @@ -220,7 +221,8 @@ define <4 x i16> @const_16_32() nounwind {
define <4 x i16> @const_16_64() nounwind {
; CHECK-LABEL: const_16_64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = <0,3,8,7,u,u,u,u>
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [0,3,8,7,0,3,8,7]
; CHECK-NEXT: # xmm0 = mem[0,0]
; CHECK-NEXT: ret{{[l|q]}}
%G = trunc <4 x i64> <i64 0, i64 3, i64 8, i64 7> to <4 x i16>
ret <4 x i16> %G
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/vec_fabs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
define <8 x double> @fabs_v8f64(<8 x double> %p) {
; X86-AVX-LABEL: fabs_v8f64:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
; X86-AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
; X86-AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; X86-AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
; X86-AVX-NEXT: retl
Expand All @@ -166,7 +166,7 @@ define <8 x double> @fabs_v8f64(<8 x double> %p) {
;
; X64-AVX-LABEL: fabs_v8f64:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
; X64-AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
; X64-AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; X64-AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
; X64-AVX-NEXT: retq
Expand All @@ -188,7 +188,7 @@ declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
define <16 x float> @fabs_v16f32(<16 x float> %p) {
; X86-AVX-LABEL: fabs_v16f32:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
; X86-AVX-NEXT: vbroadcastss {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
; X86-AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; X86-AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
; X86-AVX-NEXT: retl
Expand All @@ -205,7 +205,7 @@ define <16 x float> @fabs_v16f32(<16 x float> %p) {
;
; X64-AVX-LABEL: fabs_v16f32:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
; X64-AVX-NEXT: vbroadcastss {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
; X64-AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; X64-AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
; X64-AVX-NEXT: retq
Expand Down
6 changes: 4 additions & 2 deletions llvm/test/CodeGen/X86/vec_fp_to_int.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1912,7 +1912,8 @@ define <4 x i32> @fptosi_2f64_to_2i32_const() {
;
; AVX-LABEL: fptosi_2f64_to_2i32_const:
; AVX: # %bb.0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <4294967295,1,u,u>
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = [4294967295,1,4294967295,1]
; AVX-NEXT: # xmm0 = mem[0,0]
; AVX-NEXT: retq
%cvt = fptosi <2 x double> <double -1.0, double 1.0> to <2 x i32>
%ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
Expand Down Expand Up @@ -1970,7 +1971,8 @@ define <4 x i32> @fptoui_2f64_to_2i32_const(<2 x double> %a) {
;
; AVX-LABEL: fptoui_2f64_to_2i32_const:
; AVX: # %bb.0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <2,4,u,u>
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = [2,4,2,4]
; AVX-NEXT: # xmm0 = mem[0,0]
; AVX-NEXT: retq
%cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i32>
%ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
Expand Down
23 changes: 15 additions & 8 deletions llvm/test/CodeGen/X86/vec_int_to_fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4786,7 +4786,7 @@ define <8 x float> @uitofp_load_8i64_to_8f32(ptr%a) {
; AVX1: # %bb.0:
; AVX1-NEXT: vmovapd (%rdi), %ymm2
; AVX1-NEXT: vmovapd 32(%rdi), %ymm3
; AVX1-NEXT: vmovapd {{.*#+}} ymm4 = [1,1,1,1]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1,1,1,1]
; AVX1-NEXT: vandpd %ymm4, %ymm3, %ymm5
; AVX1-NEXT: vmovaps (%rdi), %xmm0
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
Expand Down Expand Up @@ -5640,7 +5640,8 @@ define void @PR43609(ptr nocapture %x, <2 x i64> %y) #0 {
; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
; AVX1-NEXT: vpor %xmm5, %xmm0, %xmm0
; AVX1-NEXT: vmovapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
; AVX1-NEXT: # xmm6 = mem[0,0]
; AVX1-NEXT: vsubpd %xmm6, %xmm0, %xmm0
; AVX1-NEXT: vaddpd %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
Expand All @@ -5649,7 +5650,8 @@ define void @PR43609(ptr nocapture %x, <2 x i64> %y) #0 {
; AVX1-NEXT: vpor %xmm5, %xmm1, %xmm1
; AVX1-NEXT: vsubpd %xmm6, %xmm1, %xmm1
; AVX1-NEXT: vaddpd %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vmovapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
; AVX1-NEXT: # xmm2 = mem[0,0]
; AVX1-NEXT: vaddpd %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vaddpd %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vmovupd %xmm0, (%rdi)
Expand All @@ -5666,7 +5668,8 @@ define void @PR43609(ptr nocapture %x, <2 x i64> %y) #0 {
; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
; AVX2-NEXT: vpor %xmm5, %xmm0, %xmm0
; AVX2-NEXT: vmovapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
; AVX2-NEXT: vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
; AVX2-NEXT: # xmm6 = mem[0,0]
; AVX2-NEXT: vsubpd %xmm6, %xmm0, %xmm0
; AVX2-NEXT: vaddpd %xmm0, %xmm3, %xmm0
; AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
Expand All @@ -5675,7 +5678,8 @@ define void @PR43609(ptr nocapture %x, <2 x i64> %y) #0 {
; AVX2-NEXT: vpor %xmm5, %xmm1, %xmm1
; AVX2-NEXT: vsubpd %xmm6, %xmm1, %xmm1
; AVX2-NEXT: vaddpd %xmm1, %xmm2, %xmm1
; AVX2-NEXT: vmovapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
; AVX2-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
; AVX2-NEXT: # xmm2 = mem[0,0]
; AVX2-NEXT: vaddpd %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vaddpd %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vmovupd %xmm0, (%rdi)
Expand All @@ -5692,7 +5696,8 @@ define void @PR43609(ptr nocapture %x, <2 x i64> %y) #0 {
; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm0
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
; AVX512F-NEXT: vpor %xmm5, %xmm0, %xmm0
; AVX512F-NEXT: vmovapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
; AVX512F-NEXT: vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
; AVX512F-NEXT: # xmm6 = mem[0,0]
; AVX512F-NEXT: vsubpd %xmm6, %xmm0, %xmm0
; AVX512F-NEXT: vaddpd %xmm0, %xmm3, %xmm0
; AVX512F-NEXT: vpblendd {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
Expand All @@ -5701,7 +5706,8 @@ define void @PR43609(ptr nocapture %x, <2 x i64> %y) #0 {
; AVX512F-NEXT: vpor %xmm5, %xmm1, %xmm1
; AVX512F-NEXT: vsubpd %xmm6, %xmm1, %xmm1
; AVX512F-NEXT: vaddpd %xmm1, %xmm2, %xmm1
; AVX512F-NEXT: vmovapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
; AVX512F-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
; AVX512F-NEXT: # xmm2 = mem[0,0]
; AVX512F-NEXT: vaddpd %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vaddpd %xmm2, %xmm1, %xmm1
; AVX512F-NEXT: vmovupd %xmm0, (%rdi)
Expand Down Expand Up @@ -5742,7 +5748,8 @@ define void @PR43609(ptr nocapture %x, <2 x i64> %y) #0 {
; AVX512DQ-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
; AVX512DQ-NEXT: vcvtuqq2pd %zmm1, %zmm1
; AVX512DQ-NEXT: vmovapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
; AVX512DQ-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
; AVX512DQ-NEXT: # xmm2 = mem[0,0]
; AVX512DQ-NEXT: vaddpd %xmm2, %xmm0, %xmm0
; AVX512DQ-NEXT: vaddpd %xmm2, %xmm1, %xmm1
; AVX512DQ-NEXT: vmovupd %xmm0, (%rdi)
Expand Down
74 changes: 31 additions & 43 deletions llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,8 @@ define <2 x double> @constrained_vector_fmul_v2f64() #0 {
;
; AVX-LABEL: constrained_vector_fmul_v2f64:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
; AVX-NEXT: # xmm0 = mem[0,0]
; AVX-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
entry:
Expand Down Expand Up @@ -493,7 +494,8 @@ define <3 x double> @constrained_vector_fmul_v3f64() #0 {
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308]
; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308]
; AVX-NEXT: # xmm1 = mem[0,0]
; AVX-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX-NEXT: retq
Expand All @@ -516,17 +518,11 @@ define <4 x double> @constrained_vector_fmul_v4f64() #0 {
; CHECK-NEXT: mulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_fmul_v4f64:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308]
; AVX1-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_fmul_v4f64:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308]
; AVX512-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX512-NEXT: retq
; AVX-LABEL: constrained_vector_fmul_v4f64:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308]
; AVX-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX-NEXT: retq
entry:
%mul = call <4 x double> @llvm.experimental.constrained.fmul.v4f64(
<4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
Expand Down Expand Up @@ -568,7 +564,8 @@ define <2 x double> @constrained_vector_fadd_v2f64() #0 {
;
; AVX-LABEL: constrained_vector_fadd_v2f64:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
; AVX-NEXT: # xmm0 = mem[0,0]
; AVX-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
entry:
Expand Down Expand Up @@ -631,7 +628,8 @@ define <3 x double> @constrained_vector_fadd_v3f64() #0 {
; AVX: # %bb.0: # %entry
; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308]
; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308]
; AVX-NEXT: # xmm1 = mem[0,0]
; AVX-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX-NEXT: retq
Expand All @@ -654,17 +652,11 @@ define <4 x double> @constrained_vector_fadd_v4f64() #0 {
; CHECK-NEXT: addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_fadd_v4f64:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308]
; AVX1-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_fadd_v4f64:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308]
; AVX512-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX512-NEXT: retq
; AVX-LABEL: constrained_vector_fadd_v4f64:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308]
; AVX-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX-NEXT: retq
entry:
%add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64(
<4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
Expand Down Expand Up @@ -706,7 +698,8 @@ define <2 x double> @constrained_vector_fsub_v2f64() #0 {
;
; AVX-LABEL: constrained_vector_fsub_v2f64:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
; AVX-NEXT: # xmm0 = mem[0,0]
; AVX-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
entry:
Expand Down Expand Up @@ -772,7 +765,8 @@ define <3 x double> @constrained_vector_fsub_v3f64() #0 {
; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
; AVX-NEXT: # xmm1 = mem[0,0]
; AVX-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX-NEXT: retq
Expand All @@ -795,17 +789,11 @@ define <4 x double> @constrained_vector_fsub_v4f64() #0 {
; CHECK-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_fsub_v4f64:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308]
; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_fsub_v4f64:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308]
; AVX512-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX512-NEXT: retq
; AVX-LABEL: constrained_vector_fsub_v4f64:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vbroadcastsd {{.*#+}} ymm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308]
; AVX-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX-NEXT: retq
entry:
%sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(
<4 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
Expand Down Expand Up @@ -4460,11 +4448,11 @@ define <4 x i32> @constrained_vector_fptoui_v4i32_v4f32() #0 {
;
; AVX1-LABEL: constrained_vector_fptoui_v4i32_v4f32:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm0 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1]
; AVX1-NEXT: vcmpltps %xmm0, %xmm1, %xmm2
; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vmovaps {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
; AVX1-NEXT: vblendvps %xmm2, %xmm3, %xmm4, %xmm4
; AVX1-NEXT: vblendvps %xmm2, %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vsubps %xmm0, %xmm1, %xmm0
Expand Down Expand Up @@ -5010,13 +4998,13 @@ define <4 x i32> @constrained_vector_fptoui_v4i32_v4f64() #0 {
;
; AVX1-LABEL: constrained_vector_fptoui_v4i32_v4f64:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm0 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1]
; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm2
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm2[0,2],xmm3[0,2]
; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4
; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
; AVX1-NEXT: vblendvps %xmm3, %xmm4, %xmm5, %xmm3
; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4
; AVX1-NEXT: vblendvpd %ymm2, %ymm4, %ymm0, %ymm0
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/vector-fshl-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ declare <32 x i8> @llvm.fshl.v32i8(<32 x i8>, <32 x i8>, <32 x i8>)
define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt) nounwind {
; AVX1-LABEL: var_funnnel_v4i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm3 = [63,63,63,63]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm3 = [63,63,63,63]
; AVX1-NEXT: vandnps %ymm3, %ymm2, %ymm4
; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm5
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6
Expand Down Expand Up @@ -123,7 +123,7 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt)
;
; XOPAVX1-LABEL: var_funnnel_v4i64:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vmovaps {{.*#+}} ymm3 = [63,63,63,63]
; XOPAVX1-NEXT: vbroadcastsd {{.*#+}} ymm3 = [63,63,63,63]
; XOPAVX1-NEXT: vandps %ymm3, %ymm2, %ymm4
; XOPAVX1-NEXT: vextractf128 $1, %ymm4, %xmm5
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/vector-fshr-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ declare <32 x i8> @llvm.fshr.v32i8(<32 x i8>, <32 x i8>, <32 x i8>)
define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt) nounwind {
; AVX1-LABEL: var_funnnel_v4i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm3 = [63,63,63,63]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm3 = [63,63,63,63]
; AVX1-NEXT: vandps %ymm3, %ymm2, %ymm4
; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm5
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6
Expand Down Expand Up @@ -124,7 +124,7 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt)
;
; XOPAVX1-LABEL: var_funnnel_v4i64:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vmovaps {{.*#+}} ymm3 = [63,63,63,63]
; XOPAVX1-NEXT: vbroadcastsd {{.*#+}} ymm3 = [63,63,63,63]
; XOPAVX1-NEXT: vandps %ymm3, %ymm2, %ymm4
; XOPAVX1-NEXT: vextractf128 $1, %ymm4, %xmm5
; XOPAVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,8 @@ define void @load_i32_stride3_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm6 = <1,4,7,2,5,u,u,u>
; AVX2-FAST-NEXT: vpermps %ymm5, %ymm6, %ymm5
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm4 = ymm5[0,1,2,3,4],ymm4[5,6,7]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm5 = <u,u,u,u,u,1,4,7>
; AVX2-FAST-NEXT: vbroadcastf128 {{.*#+}} ymm5 = [0,1,4,7,0,1,4,7]
; AVX2-FAST-NEXT: # ymm5 = mem[0,1,0,1]
; AVX2-FAST-NEXT: vpermps %ymm2, %ymm5, %ymm2
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3,4],ymm0[5],ymm1[6,7]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = <2,5,0,3,6,u,u,u>
Expand Down
60 changes: 40 additions & 20 deletions llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,8 @@ define void @load_i32_stride4_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
;
; AVX2-ONLY-LABEL: load_i32_stride4_vf4:
; AVX2-ONLY: # %bb.0:
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm0 = <u,u,0,4>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm0 = [0,4,0,4]
; AVX2-ONLY-NEXT: # xmm0 = mem[0,0]
; AVX2-ONLY-NEXT: vmovaps 32(%rdi), %ymm1
; AVX2-ONLY-NEXT: vpermps %ymm1, %ymm0, %ymm0
; AVX2-ONLY-NEXT: vmovaps (%rdi), %xmm2
Expand All @@ -187,16 +188,19 @@ define void @load_i32_stride4_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm0 = xmm5[0,1],xmm0[2,3]
; AVX2-ONLY-NEXT: vmovaps 48(%rdi), %xmm5
; AVX2-ONLY-NEXT: vunpcklps {{.*#+}} xmm6 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm7 = <1,5,u,u>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm7 = [1,5,1,5]
; AVX2-ONLY-NEXT: # xmm7 = mem[0,0]
; AVX2-ONLY-NEXT: vmovaps (%rdi), %ymm8
; AVX2-ONLY-NEXT: vpermps %ymm8, %ymm7, %ymm7
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm6 = xmm7[0,1],xmm6[2,3]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm7 = <u,u,2,6>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm7 = [2,6,2,6]
; AVX2-ONLY-NEXT: # xmm7 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps %ymm1, %ymm7, %ymm1
; AVX2-ONLY-NEXT: vunpckhps {{.*#+}} xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3]
; AVX2-ONLY-NEXT: vunpckhps {{.*#+}} xmm2 = xmm4[2],xmm5[2],xmm4[3],xmm5[3]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm3 = <3,7,u,u>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm3 = [3,7,3,7]
; AVX2-ONLY-NEXT: # xmm3 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps %ymm8, %ymm3, %ymm3
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3]
; AVX2-ONLY-NEXT: vmovaps %xmm0, (%rsi)
Expand Down Expand Up @@ -336,7 +340,8 @@ define void @load_i32_stride4_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-ONLY-NEXT: vpermps %ymm2, %ymm3, %ymm5
; AVX2-ONLY-NEXT: vpermps %ymm1, %ymm3, %ymm3
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3,4,5],ymm5[6,7]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm5 = <u,u,0,4>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm5 = [0,4,0,4]
; AVX2-ONLY-NEXT: # xmm5 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps %ymm4, %ymm5, %ymm6
; AVX2-ONLY-NEXT: vmovaps (%rdi), %xmm7
; AVX2-ONLY-NEXT: vmovaps 16(%rdi), %xmm8
Expand All @@ -350,15 +355,17 @@ define void @load_i32_stride4_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm6 = ymm6[0,1,2,3,4,5],ymm9[6,7]
; AVX2-ONLY-NEXT: vmovaps 48(%rdi), %xmm9
; AVX2-ONLY-NEXT: vunpcklps {{.*#+}} xmm10 = xmm5[0],xmm9[0],xmm5[1],xmm9[1]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm11 = <1,5,u,u>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm11 = [1,5,1,5]
; AVX2-ONLY-NEXT: # xmm11 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps %ymm0, %ymm11, %ymm11
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm10 = xmm11[0,1],xmm10[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm6 = ymm10[0,1,2,3],ymm6[4,5,6,7]
; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm10 = [2,6,2,6,2,6,2,6]
; AVX2-ONLY-NEXT: vpermps %ymm2, %ymm10, %ymm11
; AVX2-ONLY-NEXT: vpermps %ymm1, %ymm10, %ymm10
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm10 = ymm10[0,1,2,3,4,5],ymm11[6,7]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm11 = <u,u,2,6>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm11 = [2,6,2,6]
; AVX2-ONLY-NEXT: # xmm11 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps %ymm4, %ymm11, %ymm4
; AVX2-ONLY-NEXT: vunpckhps {{.*#+}} xmm7 = xmm7[2],xmm8[2],xmm7[3],xmm8[3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm4 = xmm7[0,1],xmm4[2,3]
Expand All @@ -368,7 +375,8 @@ define void @load_i32_stride4_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-ONLY-NEXT: vpermps %ymm1, %ymm7, %ymm1
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm2[6,7]
; AVX2-ONLY-NEXT: vunpckhps {{.*#+}} xmm2 = xmm5[2],xmm9[2],xmm5[3],xmm9[3]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm5 = <3,7,u,u>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm5 = [3,7,3,7]
; AVX2-ONLY-NEXT: # xmm5 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps %ymm0, %ymm5, %ymm0
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
Expand Down Expand Up @@ -673,7 +681,8 @@ define void @load_i32_stride4_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vmovaps 144(%rdi), %xmm10
; AVX2-ONLY-NEXT: vmovaps 128(%rdi), %xmm11
; AVX2-ONLY-NEXT: vunpcklps {{.*#+}} xmm6 = xmm11[0],xmm10[0],xmm11[1],xmm10[1]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm9 = <u,u,0,4>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm9 = [0,4,0,4]
; AVX2-ONLY-NEXT: # xmm9 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps %ymm12, %ymm9, %ymm12
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm6 = xmm6[0,1],xmm12[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm1 = ymm6[0,1,2,3],ymm1[4,5,6,7]
Expand All @@ -695,7 +704,8 @@ define void @load_i32_stride4_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vmovaps 32(%rdi), %xmm15
; AVX2-ONLY-NEXT: vmovaps 48(%rdi), %xmm6
; AVX2-ONLY-NEXT: vunpcklps {{.*#+}} xmm9 = xmm15[0],xmm6[0],xmm15[1],xmm6[1]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm7 = <1,5,u,u>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm7 = [1,5,1,5]
; AVX2-ONLY-NEXT: # xmm7 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps %ymm8, %ymm7, %ymm14
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm9 = xmm14[0,1],xmm9[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm1 = ymm9[0,1,2,3],ymm1[4,5,6,7]
Expand All @@ -716,7 +726,8 @@ define void @load_i32_stride4_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vpermps %ymm3, %ymm7, %ymm9
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm8 = ymm9[0,1,2,3,4,5],ymm8[6,7]
; AVX2-ONLY-NEXT: vunpckhps {{.*#+}} xmm9 = xmm11[2],xmm10[2],xmm11[3],xmm10[3]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm10 = <u,u,2,6>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm10 = [2,6,2,6]
; AVX2-ONLY-NEXT: # xmm10 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps {{[-0-9]+}}(%r{{[sb]}}p), %ymm10, %ymm11 # 32-byte Folded Reload
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm9 = xmm9[0,1],xmm11[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm8 = ymm9[0,1,2,3],ymm8[4,5,6,7]
Expand All @@ -732,7 +743,8 @@ define void @load_i32_stride4_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vpermps %ymm4, %ymm9, %ymm4
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1,2,3,4,5],ymm5[6,7]
; AVX2-ONLY-NEXT: vunpckhps {{.*#+}} xmm5 = xmm15[2],xmm6[2],xmm15[3],xmm6[3]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm6 = <3,7,u,u>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm6 = [3,7,3,7]
; AVX2-ONLY-NEXT: # xmm6 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps {{[-0-9]+}}(%r{{[sb]}}p), %ymm6, %ymm10 # 32-byte Folded Reload
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm5 = xmm10[0,1],xmm5[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7]
Expand Down Expand Up @@ -1379,7 +1391,8 @@ define void @load_i32_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vmovaps 256(%rdi), %xmm2
; AVX2-ONLY-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX2-ONLY-NEXT: vunpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm3 = <u,u,0,4>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm3 = [0,4,0,4]
; AVX2-ONLY-NEXT: # xmm3 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps %ymm4, %ymm3, %ymm4
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
Expand Down Expand Up @@ -1440,7 +1453,8 @@ define void @load_i32_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vmovaps 288(%rdi), %xmm1
; AVX2-ONLY-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX2-ONLY-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm6 = <1,5,u,u>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm6 = [1,5,1,5]
; AVX2-ONLY-NEXT: # xmm6 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps %ymm5, %ymm6, %ymm2
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
Expand Down Expand Up @@ -1494,7 +1508,8 @@ define void @load_i32_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Reload
; AVX2-ONLY-NEXT: vunpckhps {{[-0-9]+}}(%r{{[sb]}}p), %xmm3, %xmm13 # 16-byte Folded Reload
; AVX2-ONLY-NEXT: # xmm13 = xmm3[2],mem[2],xmm3[3],mem[3]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm3 = <u,u,2,6>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm3 = [2,6,2,6]
; AVX2-ONLY-NEXT: # xmm3 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps {{[-0-9]+}}(%r{{[sb]}}p), %ymm3, %ymm10 # 32-byte Folded Reload
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm10 = xmm13[0,1],xmm10[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm10 = ymm10[0,1,2,3],ymm11[4,5,6,7]
Expand Down Expand Up @@ -1532,7 +1547,8 @@ define void @load_i32_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vpermps %ymm14, %ymm10, %ymm1
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
; AVX2-ONLY-NEXT: vunpckhps {{.*#+}} xmm1 = xmm12[2],xmm5[2],xmm12[3],xmm5[3]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm2 = <3,7,u,u>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm2 = [3,7,3,7]
; AVX2-ONLY-NEXT: # xmm2 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps {{[-0-9]+}}(%r{{[sb]}}p), %ymm2, %ymm5 # 32-byte Folded Reload
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm1 = xmm5[0,1],xmm1[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
Expand Down Expand Up @@ -2855,7 +2871,8 @@ define void @load_i32_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vmovaps 128(%rdi), %xmm1
; AVX2-ONLY-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX2-ONLY-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm7 = <u,u,0,4>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm7 = [0,4,0,4]
; AVX2-ONLY-NEXT: # xmm7 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps %ymm9, %ymm7, %ymm2
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
Expand Down Expand Up @@ -2999,7 +3016,8 @@ define void @load_i32_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vmovaps 160(%rdi), %xmm1
; AVX2-ONLY-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX2-ONLY-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm7 = <1,5,u,u>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm7 = [1,5,1,5]
; AVX2-ONLY-NEXT: # xmm7 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps %ymm5, %ymm7, %ymm10
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm1 = xmm10[0,1],xmm1[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
Expand Down Expand Up @@ -3118,7 +3136,8 @@ define void @load_i32_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; AVX2-ONLY-NEXT: vunpckhps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
; AVX2-ONLY-NEXT: # xmm1 = xmm1[2],mem[2],xmm1[3],mem[3]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm2 = <u,u,2,6>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm2 = [2,6,2,6]
; AVX2-ONLY-NEXT: # xmm2 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps {{[-0-9]+}}(%r{{[sb]}}p), %ymm2, %ymm7 # 32-byte Folded Reload
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0,1],xmm7[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm10[4,5,6,7]
Expand Down Expand Up @@ -3206,7 +3225,8 @@ define void @load_i32_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; AVX2-ONLY-NEXT: vunpckhps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm7 # 16-byte Folded Reload
; AVX2-ONLY-NEXT: # xmm7 = xmm1[2],mem[2],xmm1[3],mem[3]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm1 = <3,7,u,u>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm1 = [3,7,3,7]
; AVX2-ONLY-NEXT: # xmm1 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm10 # 32-byte Folded Reload
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm7 = xmm10[0,1],xmm7[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm2 = ymm7[0,1,2,3],ymm2[4,5,6,7]
Expand Down
104 changes: 69 additions & 35 deletions llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-6.ll

Large diffs are not rendered by default.

30 changes: 20 additions & 10 deletions llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-7.ll
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ define void @load_i32_stride7_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm6 = xmm6[0],xmm4[1],xmm6[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm3 = xmm4[0,1,2],xmm3[3]
; AVX2-ONLY-NEXT: vshufps {{.*#+}} xmm3 = xmm3[3,2,2,3]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm4 = <4,3,u,u>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm4 = [4,3,4,3]
; AVX2-ONLY-NEXT: # xmm4 = mem[0,0]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm7 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-ONLY-NEXT: vpermps %ymm7, %ymm4, %ymm4
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm7 = ymm1[0],ymm0[1],ymm1[2,3,4],ymm0[5],ymm1[6,7]
Expand Down Expand Up @@ -421,7 +422,8 @@ define void @load_i32_stride7_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-SLOW-NEXT: vblendps {{.*#+}} xmm7 = xmm7[0,1],xmm9[2,3]
; AVX2-SLOW-NEXT: vbroadcastss 100(%rdi), %xmm9
; AVX2-SLOW-NEXT: vblendps {{.*#+}} xmm9 = xmm8[0,1,2],xmm9[3]
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} xmm10 = <4,3,u,u>
; AVX2-SLOW-NEXT: vmovddup {{.*#+}} xmm10 = [4,3,4,3]
; AVX2-SLOW-NEXT: # xmm10 = mem[0,0]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm11 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-SLOW-NEXT: vpermps %ymm11, %ymm10, %ymm10
; AVX2-SLOW-NEXT: vblendps {{.*#+}} xmm9 = xmm10[0,1],xmm9[2,3]
Expand Down Expand Up @@ -457,7 +459,8 @@ define void @load_i32_stride7_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-FAST-NEXT: vpermps %ymm3, %ymm2, %ymm2
; AVX2-FAST-NEXT: vbroadcastss 84(%rdi), %xmm3
; AVX2-FAST-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[3]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm3 = <1,0,7,u,u,u,u,u>
; AVX2-FAST-NEXT: vbroadcastf128 {{.*#+}} ymm3 = [1,0,7,0,1,0,7,0]
; AVX2-FAST-NEXT: # ymm3 = mem[0,1,0,1]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm4 = ymm1[0],ymm0[1],ymm1[2,3,4],ymm0[5],ymm1[6,7]
; AVX2-FAST-NEXT: vpermps %ymm4, %ymm3, %ymm3
; AVX2-FAST-NEXT: vmovaps 80(%rdi), %xmm5
Expand All @@ -478,7 +481,8 @@ define void @load_i32_stride7_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-FAST-NEXT: vblendps {{.*#+}} xmm7 = xmm7[0,1],xmm9[2,3]
; AVX2-FAST-NEXT: vbroadcastss 100(%rdi), %xmm9
; AVX2-FAST-NEXT: vblendps {{.*#+}} xmm9 = xmm8[0,1,2],xmm9[3]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} xmm10 = <4,3,u,u>
; AVX2-FAST-NEXT: vmovddup {{.*#+}} xmm10 = [4,3,4,3]
; AVX2-FAST-NEXT: # xmm10 = mem[0,0]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm11 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-FAST-NEXT: vpermps %ymm11, %ymm10, %ymm10
; AVX2-FAST-NEXT: vblendps {{.*#+}} xmm9 = xmm10[0,1],xmm9[2,3]
Expand Down Expand Up @@ -535,7 +539,8 @@ define void @load_i32_stride7_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} xmm7 = xmm7[0,1],xmm9[2,3]
; AVX2-FAST-PERLANE-NEXT: vbroadcastss 100(%rdi), %xmm9
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} xmm9 = xmm8[0,1,2],xmm9[3]
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} xmm10 = <4,3,u,u>
; AVX2-FAST-PERLANE-NEXT: vmovddup {{.*#+}} xmm10 = [4,3,4,3]
; AVX2-FAST-PERLANE-NEXT: # xmm10 = mem[0,0]
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm11 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm11, %ymm10, %ymm10
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} xmm9 = xmm10[0,1],xmm9[2,3]
Expand Down Expand Up @@ -4298,7 +4303,8 @@ define void @load_i32_stride7_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-SLOW-NEXT: vbroadcastss 100(%rdi), %xmm1
; AVX2-SLOW-NEXT: vmovaps 64(%rdi), %xmm0
; AVX2-SLOW-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3]
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} xmm3 = <4,3,u,u>
; AVX2-SLOW-NEXT: vmovddup {{.*#+}} xmm3 = [4,3,4,3]
; AVX2-SLOW-NEXT: # xmm3 = mem[0,0]
; AVX2-SLOW-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
; AVX2-SLOW-NEXT: vblendps $15, {{[-0-9]+}}(%r{{[sb]}}p), %ymm2, %ymm2 # 32-byte Folded Reload
; AVX2-SLOW-NEXT: # ymm2 = mem[0,1,2,3],ymm2[4,5,6,7]
Expand Down Expand Up @@ -5321,7 +5327,8 @@ define void @load_i32_stride7_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-PERLANE-NEXT: vbroadcastss 100(%rdi), %xmm1
; AVX2-FAST-PERLANE-NEXT: vmovaps 64(%rdi), %xmm0
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3]
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} xmm3 = <4,3,u,u>
; AVX2-FAST-PERLANE-NEXT: vmovddup {{.*#+}} xmm3 = [4,3,4,3]
; AVX2-FAST-PERLANE-NEXT: # xmm3 = mem[0,0]
; AVX2-FAST-PERLANE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
; AVX2-FAST-PERLANE-NEXT: vblendps $15, {{[-0-9]+}}(%r{{[sb]}}p), %ymm2, %ymm2 # 32-byte Folded Reload
; AVX2-FAST-PERLANE-NEXT: # ymm2 = mem[0,1,2,3],ymm2[4,5,6,7]
Expand Down Expand Up @@ -9202,7 +9209,8 @@ define void @load_i32_stride7_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-SLOW-NEXT: vbroadcastss 100(%rdi), %xmm1
; AVX2-SLOW-NEXT: vmovaps 64(%rdi), %xmm0
; AVX2-SLOW-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3]
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} xmm5 = <4,3,u,u>
; AVX2-SLOW-NEXT: vmovddup {{.*#+}} xmm5 = [4,3,4,3]
; AVX2-SLOW-NEXT: # xmm5 = mem[0,0]
; AVX2-SLOW-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
; AVX2-SLOW-NEXT: vblendps $15, {{[-0-9]+}}(%r{{[sb]}}p), %ymm2, %ymm2 # 32-byte Folded Reload
; AVX2-SLOW-NEXT: # ymm2 = mem[0,1,2,3],ymm2[4,5,6,7]
Expand Down Expand Up @@ -10244,7 +10252,8 @@ define void @load_i32_stride7_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-NEXT: vbroadcastss 100(%rdi), %xmm1
; AVX2-FAST-NEXT: vmovaps 64(%rdi), %xmm0
; AVX2-FAST-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} xmm5 = <4,3,u,u>
; AVX2-FAST-NEXT: vmovddup {{.*#+}} xmm5 = [4,3,4,3]
; AVX2-FAST-NEXT: # xmm5 = mem[0,0]
; AVX2-FAST-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
; AVX2-FAST-NEXT: vblendps $240, {{[-0-9]+}}(%r{{[sb]}}p), %ymm2, %ymm2 # 32-byte Folded Reload
; AVX2-FAST-NEXT: # ymm2 = ymm2[0,1,2,3],mem[4,5,6,7]
Expand Down Expand Up @@ -11293,7 +11302,8 @@ define void @load_i32_stride7_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-PERLANE-NEXT: vbroadcastss 100(%rdi), %xmm1
; AVX2-FAST-PERLANE-NEXT: vmovaps 64(%rdi), %xmm0
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3]
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} xmm5 = <4,3,u,u>
; AVX2-FAST-PERLANE-NEXT: vmovddup {{.*#+}} xmm5 = [4,3,4,3]
; AVX2-FAST-PERLANE-NEXT: # xmm5 = mem[0,0]
; AVX2-FAST-PERLANE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
; AVX2-FAST-PERLANE-NEXT: vblendps $15, {{[-0-9]+}}(%r{{[sb]}}p), %ymm2, %ymm2 # 32-byte Folded Reload
; AVX2-FAST-PERLANE-NEXT: # ymm2 = mem[0,1,2,3],ymm2[4,5,6,7]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ define void @store_i32_stride3_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vmovaps (%rdi), %xmm0
; AVX2-FAST-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm0
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = <u,3,7,u,u,u,u,u>
; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm1 = [7,3,7,3,7,3,7,3]
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm1
; AVX2-FAST-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
; AVX2-FAST-NEXT: vblendps {{.*#+}} xmm1 = xmm2[0],xmm1[1,2],xmm2[3]
Expand Down Expand Up @@ -306,7 +306,8 @@ define void @store_i32_stride3_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm4[2],ymm0[3,4],ymm4[5],ymm0[6,7]
; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm4 = ymm2[1,1,2,2]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm4[0],ymm0[1,2],ymm4[3],ymm0[4,5],ymm4[6],ymm0[7]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm4 = <5,u,u,6,u,u,7,u>
; AVX2-FAST-NEXT: vbroadcastf128 {{.*#+}} ymm4 = [5,0,7,6,5,0,7,6]
; AVX2-FAST-NEXT: # ymm4 = mem[0,1,0,1]
; AVX2-FAST-NEXT: vpermps %ymm1, %ymm4, %ymm1
; AVX2-FAST-NEXT: vbroadcastsd 24(%rdi), %ymm4
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm4[2],ymm1[3,4],ymm4[5],ymm1[6,7]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ define void @store_i32_stride5_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm4 = ymm5[0,1],ymm4[2,3],ymm5[4,5],ymm4[6,7]
; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm5 = ymm3[0,1,2,1]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0],ymm5[1],ymm4[2,3,4,5],ymm5[6],ymm4[7]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm5 = <u,3,7,u,u,u,u,u>
; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm5 = [7,3,7,3,7,3,7,3]
; AVX2-FAST-NEXT: vpermps %ymm2, %ymm5, %ymm2
; AVX2-FAST-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
; AVX2-FAST-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,8 @@ define void @store_i32_stride6_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-ONLY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm6
; AVX2-ONLY-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm7
; AVX2-ONLY-NEXT: vinsertf128 $1, %xmm5, %ymm4, %ymm8
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm9 = <u,u,0,4>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm9 = [0,4,0,4]
; AVX2-ONLY-NEXT: # xmm9 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps %ymm7, %ymm9, %ymm9
; AVX2-ONLY-NEXT: vbroadcastf128 {{.*#+}} ymm10 = [0,4,1,5,0,4,1,5]
; AVX2-ONLY-NEXT: # ymm10 = mem[0,1,0,1]
Expand Down
18 changes: 11 additions & 7 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-7.ll
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,8 @@ define void @store_i32_stride7_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-FAST-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0,1,2],xmm3[3]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm4 = <0,2,4,6,u,u,u,1>
; AVX2-FAST-NEXT: vpermps %ymm2, %ymm4, %ymm2
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm4 = <u,u,u,u,0,2,4,u>
; AVX2-FAST-NEXT: vbroadcastf128 {{.*#+}} ymm4 = [0,2,4,0,0,2,4,0]
; AVX2-FAST-NEXT: # ymm4 = mem[0,1,0,1]
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm4, %ymm0
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6],ymm2[7]
; AVX2-FAST-NEXT: vextractf128 $1, %ymm3, %xmm2
Expand Down Expand Up @@ -439,7 +440,8 @@ define void @store_i32_stride7_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-SLOW-NEXT: vpermps %ymm4, %ymm10, %ymm10
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm5 = ymm10[0],ymm5[1,2],ymm10[3,4,5,6,7]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm5 = ymm5[0,1,2],ymm9[3,4,5],ymm5[6,7]
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} xmm9 = <u,u,0,4>
; AVX2-SLOW-NEXT: vmovddup {{.*#+}} xmm9 = [0,4,0,4]
; AVX2-SLOW-NEXT: # xmm9 = mem[0,0]
; AVX2-SLOW-NEXT: vpermps %ymm6, %ymm9, %ymm6
; AVX2-SLOW-NEXT: vbroadcastf128 {{.*#+}} ymm9 = [0,4,0,1,0,4,0,1]
; AVX2-SLOW-NEXT: # ymm9 = mem[0,1,0,1]
Expand Down Expand Up @@ -476,14 +478,14 @@ define void @store_i32_stride7_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-FAST-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm7
; AVX2-FAST-NEXT: vshufps {{.*#+}} ymm8 = ymm6[1,1,1,1,5,5,5,5]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm7 = ymm8[0,1,2,3,4],ymm7[5],ymm8[6,7]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm8 = <u,1,5,u,u,u,u,u>
; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm8 = [5,1,5,1,5,1,5,1]
; AVX2-FAST-NEXT: vpermps %ymm3, %ymm8, %ymm8
; AVX2-FAST-NEXT: vbroadcastf128 {{.*#+}} ymm9 = [5,0,2,6,5,0,2,6]
; AVX2-FAST-NEXT: # ymm9 = mem[0,1,0,1]
; AVX2-FAST-NEXT: vpermps %ymm2, %ymm9, %ymm9
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm8 = ymm9[0],ymm8[1,2],ymm9[3,4,5,6,7]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm7 = ymm8[0,1,2],ymm7[3,4,5],ymm8[6,7]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm8 = <u,u,u,u,u,3,7,u>
; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm8 = [7,3,7,3,7,3,7,3]
; AVX2-FAST-NEXT: vpermps %ymm2, %ymm8, %ymm8
; AVX2-FAST-NEXT: vbroadcastf128 {{.*#+}} ymm9 = [2,6,0,3,2,6,0,3]
; AVX2-FAST-NEXT: # ymm9 = mem[0,1,0,1]
Expand All @@ -493,7 +495,8 @@ define void @store_i32_stride7_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-FAST-NEXT: vunpckhps {{.*#+}} ymm4 = ymm4[2],ymm5[2],ymm4[3],ymm5[3],ymm4[6],ymm5[6],ymm4[7],ymm5[7]
; AVX2-FAST-NEXT: vshufps {{.*#+}} ymm4 = ymm4[0,1,0,1,4,5,4,5]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm4 = ymm8[0,1],ymm4[2,3,4],ymm8[5,6,7]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} xmm5 = <u,u,0,4>
; AVX2-FAST-NEXT: vmovddup {{.*#+}} xmm5 = [0,4,0,4]
; AVX2-FAST-NEXT: # xmm5 = mem[0,0]
; AVX2-FAST-NEXT: vpermps %ymm3, %ymm5, %ymm3
; AVX2-FAST-NEXT: vbroadcastf128 {{.*#+}} ymm5 = [0,4,0,1,0,4,0,1]
; AVX2-FAST-NEXT: # ymm5 = mem[0,1,0,1]
Expand All @@ -504,7 +507,7 @@ define void @store_i32_stride7_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-FAST-NEXT: vbroadcastss (%r10), %ymm5
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3,4,5],ymm5[6,7]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6],ymm2[7]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm3 = <u,3,7,u,u,u,u,u>
; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm3 = [7,3,7,3,7,3,7,3]
; AVX2-FAST-NEXT: vpermps %ymm6, %ymm3, %ymm3
; AVX2-FAST-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
; AVX2-FAST-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3]
Expand Down Expand Up @@ -548,7 +551,8 @@ define void @store_i32_stride7_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm4, %ymm10, %ymm10
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm5 = ymm10[0],ymm5[1,2],ymm10[3,4,5,6,7]
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm5 = ymm5[0,1,2],ymm9[3,4,5],ymm5[6,7]
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} xmm9 = <u,u,0,4>
; AVX2-FAST-PERLANE-NEXT: vmovddup {{.*#+}} xmm9 = [0,4,0,4]
; AVX2-FAST-PERLANE-NEXT: # xmm9 = mem[0,0]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm6, %ymm9, %ymm6
; AVX2-FAST-PERLANE-NEXT: vbroadcastf128 {{.*#+}} ymm9 = [0,4,0,1,0,4,0,1]
; AVX2-FAST-PERLANE-NEXT: # ymm9 = mem[0,1,0,1]
Expand Down
12 changes: 8 additions & 4 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-8.ll
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,8 @@ define void @store_i32_stride8_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-ONLY-NEXT: vpermps %ymm7, %ymm6, %ymm9
; AVX2-ONLY-NEXT: vpermps %ymm5, %ymm6, %ymm6
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm6 = ymm6[0,1,2,3,4,5],ymm9[6,7]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm9 = <u,u,0,4>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm9 = [0,4,0,4]
; AVX2-ONLY-NEXT: # xmm9 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps %ymm8, %ymm9, %ymm9
; AVX2-ONLY-NEXT: vunpcklps {{.*#+}} xmm10 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm9 = xmm10[0,1],xmm9[2,3]
Expand All @@ -272,7 +273,8 @@ define void @store_i32_stride8_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-ONLY-NEXT: vpermps %ymm7, %ymm9, %ymm10
; AVX2-ONLY-NEXT: vpermps %ymm5, %ymm9, %ymm9
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm9 = ymm9[0,1,2,3,4,5],ymm10[6,7]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm10 = <1,5,u,u>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm10 = [1,5,1,5]
; AVX2-ONLY-NEXT: # xmm10 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps %ymm2, %ymm10, %ymm10
; AVX2-ONLY-NEXT: vunpcklps {{.*#+}} xmm11 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm10 = xmm10[0,1],xmm11[2,3]
Expand All @@ -281,7 +283,8 @@ define void @store_i32_stride8_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-ONLY-NEXT: vpermps %ymm7, %ymm10, %ymm11
; AVX2-ONLY-NEXT: vpermps %ymm5, %ymm10, %ymm10
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm10 = ymm10[0,1,2,3,4,5],ymm11[6,7]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm11 = <u,u,2,6>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm11 = [2,6,2,6]
; AVX2-ONLY-NEXT: # xmm11 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps %ymm8, %ymm11, %ymm8
; AVX2-ONLY-NEXT: vunpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm3 = xmm3[0,1],xmm8[2,3]
Expand All @@ -290,7 +293,8 @@ define void @store_i32_stride8_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-ONLY-NEXT: vpermps %ymm7, %ymm4, %ymm7
; AVX2-ONLY-NEXT: vpermps %ymm5, %ymm4, %ymm4
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1,2,3,4,5],ymm7[6,7]
; AVX2-ONLY-NEXT: vmovaps {{.*#+}} xmm5 = <3,7,u,u>
; AVX2-ONLY-NEXT: vmovddup {{.*#+}} xmm5 = [3,7,3,7]
; AVX2-ONLY-NEXT: # xmm5 = mem[0,0]
; AVX2-ONLY-NEXT: vpermps %ymm2, %ymm5, %ymm2
; AVX2-ONLY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3]
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/vector-reduce-add-mask.ll
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ define i64 @test_v16i64_v16i8(<16 x i64> %a0) {
;
; AVX1-LABEL: test_v16i64_v16i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1,1,1,1]
; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0
; AVX1-NEXT: vandps %ymm4, %ymm2, %ymm2
; AVX1-NEXT: vandps %ymm4, %ymm1, %ymm1
Expand Down Expand Up @@ -577,7 +577,7 @@ define i32 @test_v16i32_v16i8(<16 x i32> %a0) {
;
; AVX1-SLOW-LABEL: test_v16i32_v16i8:
; AVX1-SLOW: # %bb.0:
; AVX1-SLOW-NEXT: vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-SLOW-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-SLOW-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-SLOW-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm2
Expand All @@ -595,7 +595,7 @@ define i32 @test_v16i32_v16i8(<16 x i32> %a0) {
;
; AVX1-FAST-LABEL: test_v16i32_v16i8:
; AVX1-FAST: # %bb.0:
; AVX1-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-FAST-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-FAST-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-FAST-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-FAST-NEXT: vextractf128 $1, %ymm1, %xmm2
Expand Down Expand Up @@ -694,7 +694,7 @@ define i32 @test_v32i32_v32i8(<32 x i32> %a0) {
;
; AVX1-SLOW-LABEL: test_v32i32_v32i8:
; AVX1-SLOW: # %bb.0:
; AVX1-SLOW-NEXT: vmovaps {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255]
; AVX1-SLOW-NEXT: vbroadcastss {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255]
; AVX1-SLOW-NEXT: vandps %ymm4, %ymm0, %ymm0
; AVX1-SLOW-NEXT: vandps %ymm4, %ymm2, %ymm2
; AVX1-SLOW-NEXT: vandps %ymm4, %ymm1, %ymm1
Expand All @@ -720,7 +720,7 @@ define i32 @test_v32i32_v32i8(<32 x i32> %a0) {
;
; AVX1-FAST-LABEL: test_v32i32_v32i8:
; AVX1-FAST: # %bb.0:
; AVX1-FAST-NEXT: vmovaps {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255]
; AVX1-FAST-NEXT: vbroadcastss {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255]
; AVX1-FAST-NEXT: vandps %ymm4, %ymm0, %ymm0
; AVX1-FAST-NEXT: vandps %ymm4, %ymm2, %ymm2
; AVX1-FAST-NEXT: vandps %ymm4, %ymm1, %ymm1
Expand Down Expand Up @@ -1222,7 +1222,7 @@ define i16 @test_v64i16_v64i8(<64 x i16> %a0) {
;
; AVX1-LABEL: test_v64i16_v64i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm4 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0
; AVX1-NEXT: vandps %ymm4, %ymm2, %ymm2
; AVX1-NEXT: vandps %ymm4, %ymm1, %ymm1
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
Original file line number Diff line number Diff line change
Expand Up @@ -633,7 +633,7 @@ define i1 @trunc_v16i32_v16i1(<16 x i32>) {
;
; AVX1-LABEL: trunc_v16i32_v16i1:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2211,7 +2211,7 @@ define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
define <32 x i8> @shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63(<32 x i8> %a, <32 x i8> %b) {
; AVX1-LABEL: shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
Expand Down Expand Up @@ -2247,7 +2247,7 @@ define <32 x i8> @shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_
define <32 x i8> @shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31(<32 x i8> %a, <32 x i8> %b) {
; AVX1-LABEL: shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; AVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
Original file line number Diff line number Diff line change
Expand Up @@ -903,7 +903,8 @@ define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) {
;
; AVX2-FAST-ALL-LABEL: shuffle_v8f32_c348cda0:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = <u,3,4,u,u,u,u,0>
; AVX2-FAST-ALL-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [0,3,4,0,0,3,4,0]
; AVX2-FAST-ALL-NEXT: # ymm2 = mem[0,1,0,1]
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
; AVX2-FAST-ALL-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4,5,2,0,4,5,2,0]
; AVX2-FAST-ALL-NEXT: # ymm2 = mem[0,1,0,1]
Expand Down Expand Up @@ -953,7 +954,7 @@ define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) {
;
; AVX2-FAST-ALL-LABEL: shuffle_v8f32_f511235a:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = <7,u,u,u,u,u,u,2>
; AVX2-FAST-ALL-NEXT: vbroadcastsd {{.*#+}} ymm2 = [7,2,7,2,7,2,7,2]
; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = <u,5,1,1,2,3,5,u>
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,8 @@ define <8 x float> @expand15(<4 x float> %a) {
; AVX512-FAST-LABEL: expand15:
; AVX512-FAST: # %bb.0:
; AVX512-FAST-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512-FAST-NEXT: vmovaps {{.*#+}} ymm1 = <u,u,0,u,1,u,u,u>
; AVX512-FAST-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [1,0,0,0,1,0,0,0]
; AVX512-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX512-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX512-FAST-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1],ymm0[2],mem[3],ymm0[4],mem[5,6,7]
; AVX512-FAST-NEXT: ret{{[l|q]}}
Expand Down
6 changes: 4 additions & 2 deletions llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,8 @@ define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, ptr n
; X86-AVX512-NEXT: vmovapd {{.*#+}} ymm4 = [0,0,3,0,10,0,1,0]
; X86-AVX512-NEXT: vpermi2pd %zmm0, %zmm3, %zmm4
; X86-AVX512-NEXT: vmovapd %ymm4, (%ecx)
; X86-AVX512-NEXT: vmovapd {{.*#+}} ymm3 = <3,0,11,0,u,u,u,u>
; X86-AVX512-NEXT: vbroadcastf128 {{.*#+}} ymm3 = [3,0,11,0,3,0,11,0]
; X86-AVX512-NEXT: # ymm3 = mem[0,1,0,1]
; X86-AVX512-NEXT: vpermi2pd %zmm1, %zmm0, %zmm3
; X86-AVX512-NEXT: vmovapd {{.*#+}} ymm0 = [2,0,8,0,9,0,3,0]
; X86-AVX512-NEXT: vpermi2pd %zmm3, %zmm2, %zmm0
Expand Down Expand Up @@ -568,7 +569,8 @@ define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, ptr n
; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm3 = [0,3,10,1]
; X64-AVX512-NEXT: vpermi2pd %zmm0, %zmm4, %zmm3
; X64-AVX512-NEXT: vmovapd %ymm3, (%rsi)
; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm3 = <3,11,u,u>
; X64-AVX512-NEXT: vbroadcastf128 {{.*#+}} ymm3 = [3,11,3,11]
; X64-AVX512-NEXT: # ymm3 = mem[0,1,0,1]
; X64-AVX512-NEXT: vpermi2pd %zmm1, %zmm0, %zmm3
; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm0 = [2,8,9,3]
; X64-AVX512-NEXT: vpermi2pd %zmm3, %zmm2, %zmm0
Expand Down
49 changes: 17 additions & 32 deletions llvm/test/CodeGen/X86/vector-shuffle-combining.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2842,7 +2842,8 @@ define <4 x float> @PR30264(<4 x float> %x) {
;
; AVX-LABEL: PR30264:
; AVX: # %bb.0:
; AVX-NEXT: vmovaps {{.*#+}} xmm1 = <u,u,4.0E+0,1.0E+0>
; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [4.0E+0,1.0E+0,4.0E+0,1.0E+0]
; AVX-NEXT: # xmm1 = mem[0,0]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[2,3]
; AVX-NEXT: retq
%shuf1 = shufflevector <4 x float> %x, <4 x float> <float undef, float 0.0, float undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
Expand Down Expand Up @@ -3471,37 +3472,21 @@ define void @SpinningCube() {
; SSE41-NEXT: movaps %xmm2, (%rax)
; SSE41-NEXT: retq
;
; AVX1-LABEL: SpinningCube:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: movl $1065353216, (%rax) # imm = 0x3F800000
; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = <u,u,u,1.0E+0>
; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = <0.0E+0,0.0E+0,-2.0E+0,u>
; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm1[0,1,2],xmm2[0]
; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0],xmm2[0],xmm0[2,3]
; AVX1-NEXT: vaddps %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vmovaps %xmm2, (%rax)
; AVX1-NEXT: vbroadcastss (%rax), %xmm2
; AVX1-NEXT: vmulps %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vmovaps %xmm0, (%rax)
; AVX1-NEXT: retq
;
; AVX2-LABEL: SpinningCube:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: movl $1065353216, (%rax) # imm = 0x3F800000
; AVX2-NEXT: vbroadcastss {{.*#+}} xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX2-NEXT: vmovaps {{.*#+}} xmm1 = <0.0E+0,0.0E+0,-2.0E+0,u>
; AVX2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX2-NEXT: vinsertps {{.*#+}} xmm3 = xmm1[0,1,2],xmm2[0]
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0],xmm2[0],xmm0[2,3]
; AVX2-NEXT: vaddps %xmm2, %xmm3, %xmm2
; AVX2-NEXT: vmovaps %xmm2, (%rax)
; AVX2-NEXT: vbroadcastss (%rax), %xmm2
; AVX2-NEXT: vmulps %xmm1, %xmm2, %xmm1
; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovaps %xmm0, (%rax)
; AVX2-NEXT: retq
; AVX-LABEL: SpinningCube:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movl $1065353216, (%rax) # imm = 0x3F800000
; AVX-NEXT: vbroadcastss {{.*#+}} xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-NEXT: vmovaps {{.*#+}} xmm1 = <0.0E+0,0.0E+0,-2.0E+0,u>
; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-NEXT: vinsertps {{.*#+}} xmm3 = xmm1[0,1,2],xmm2[0]
; AVX-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0],xmm2[0],xmm0[2,3]
; AVX-NEXT: vaddps %xmm2, %xmm3, %xmm2
; AVX-NEXT: vmovaps %xmm2, (%rax)
; AVX-NEXT: vbroadcastss (%rax), %xmm2
; AVX-NEXT: vmulps %xmm1, %xmm2, %xmm1
; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovaps %xmm0, (%rax)
; AVX-NEXT: retq
entry:
store float 1.000000e+00, ptr undef, align 4
%0 = load float, ptr undef, align 4
Expand Down
70 changes: 38 additions & 32 deletions llvm/test/CodeGen/X86/vector-trunc-math.ll
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,7 @@ define <8 x i16> @trunc_add_const_v8i64_v8i16(<8 x i64> %a0) nounwind {
;
; AVX1-LABEL: trunc_add_const_v8i64_v8i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65535,65535,65535,65535]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
Expand Down Expand Up @@ -628,7 +628,7 @@ define <16 x i8> @trunc_add_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
;
; AVX1-LABEL: trunc_add_const_v16i64_v16i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [255,255,255,255]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm4 = [255,255,255,255]
; AVX1-NEXT: vandps %ymm4, %ymm3, %ymm3
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5
; AVX1-NEXT: vpackusdw %xmm5, %xmm3, %xmm3
Expand Down Expand Up @@ -696,7 +696,7 @@ define <16 x i8> @trunc_add_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
;
; AVX1-LABEL: trunc_add_const_v16i32_v16i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
Expand Down Expand Up @@ -1276,7 +1276,7 @@ define <8 x i16> @trunc_sub_const_v8i64_v8i16(<8 x i64> %a0) nounwind {
;
; AVX1-LABEL: trunc_sub_const_v8i64_v8i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65535,65535,65535,65535]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
Expand Down Expand Up @@ -1376,7 +1376,7 @@ define <16 x i8> @trunc_sub_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
;
; AVX1-LABEL: trunc_sub_const_v16i64_v16i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [255,255,255,255]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm4 = [255,255,255,255]
; AVX1-NEXT: vandps %ymm4, %ymm3, %ymm3
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5
; AVX1-NEXT: vpackusdw %xmm5, %xmm3, %xmm3
Expand Down Expand Up @@ -1444,7 +1444,7 @@ define <16 x i8> @trunc_sub_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
;
; AVX1-LABEL: trunc_sub_const_v16i32_v16i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
Expand Down Expand Up @@ -1686,7 +1686,7 @@ define <8 x i16> @trunc_mul_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind {
;
; AVX1-LABEL: trunc_mul_v8i64_v8i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [65535,65535,65535,65535]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm4 = [65535,65535,65535,65535]
; AVX1-NEXT: vandps %ymm4, %ymm3, %ymm3
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5
; AVX1-NEXT: vpackusdw %xmm5, %xmm3, %xmm3
Expand Down Expand Up @@ -2202,7 +2202,7 @@ define <8 x i16> @trunc_mul_const_v8i64_v8i16(<8 x i64> %a0) nounwind {
;
; AVX1-LABEL: trunc_mul_const_v8i64_v8i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65535,65535,65535,65535]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
Expand Down Expand Up @@ -2567,7 +2567,8 @@ define <4 x i32> @trunc_and_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; AVX2-FAST-ALL-LABEL: trunc_and_v4i64_v4i32:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
; AVX2-FAST-ALL-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [0,2,4,6,0,2,4,6]
; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-FAST-ALL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX2-FAST-ALL-NEXT: vzeroupper
Expand Down Expand Up @@ -2617,7 +2618,7 @@ define <8 x i16> @trunc_and_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind {
; AVX1: # %bb.0:
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65535,65535,65535,65535]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
Expand Down Expand Up @@ -2730,7 +2731,7 @@ define <16 x i8> @trunc_and_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin
; AVX1-NEXT: vandps %ymm5, %ymm1, %ymm1
; AVX1-NEXT: vandps %ymm6, %ymm2, %ymm2
; AVX1-NEXT: vandps %ymm7, %ymm3, %ymm3
; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [255,255,255,255]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm4 = [255,255,255,255]
; AVX1-NEXT: vandps %ymm4, %ymm3, %ymm3
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5
; AVX1-NEXT: vpackusdw %xmm5, %xmm3, %xmm3
Expand Down Expand Up @@ -2806,7 +2807,7 @@ define <16 x i8> @trunc_and_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin
; AVX1: # %bb.0:
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
Expand Down Expand Up @@ -2928,7 +2929,8 @@ define <4 x i32> @trunc_and_const_v4i64_v4i32(<4 x i64> %a0) nounwind {
;
; AVX2-FAST-ALL-LABEL: trunc_and_const_v4i64_v4i32:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = <u,2,4,6,u,u,u,u>
; AVX2-FAST-ALL-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [0,2,4,6,0,2,4,6]
; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-FAST-ALL-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-FAST-ALL-NEXT: vzeroupper
Expand Down Expand Up @@ -2973,7 +2975,7 @@ define <8 x i16> @trunc_and_const_v8i64_v8i16(<8 x i64> %a0) nounwind {
;
; AVX1-LABEL: trunc_and_const_v8i64_v8i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65535,65535,65535,65535]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
Expand Down Expand Up @@ -3073,7 +3075,7 @@ define <16 x i8> @trunc_and_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
;
; AVX1-LABEL: trunc_and_const_v16i64_v16i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [255,255,255,255]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm4 = [255,255,255,255]
; AVX1-NEXT: vandps %ymm4, %ymm3, %ymm3
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5
; AVX1-NEXT: vpackusdw %xmm5, %xmm3, %xmm3
Expand Down Expand Up @@ -3141,7 +3143,7 @@ define <16 x i8> @trunc_and_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
;
; AVX1-LABEL: trunc_and_const_v16i32_v16i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
Expand Down Expand Up @@ -3264,7 +3266,8 @@ define <4 x i32> @trunc_xor_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; AVX2-FAST-ALL-LABEL: trunc_xor_v4i64_v4i32:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
; AVX2-FAST-ALL-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [0,2,4,6,0,2,4,6]
; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-FAST-ALL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX2-FAST-ALL-NEXT: vzeroupper
Expand Down Expand Up @@ -3314,7 +3317,7 @@ define <8 x i16> @trunc_xor_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind {
; AVX1: # %bb.0:
; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vxorps %ymm3, %ymm1, %ymm1
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65535,65535,65535,65535]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
Expand Down Expand Up @@ -3427,7 +3430,7 @@ define <16 x i8> @trunc_xor_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin
; AVX1-NEXT: vxorps %ymm5, %ymm1, %ymm1
; AVX1-NEXT: vxorps %ymm6, %ymm2, %ymm2
; AVX1-NEXT: vxorps %ymm7, %ymm3, %ymm3
; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [255,255,255,255]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm4 = [255,255,255,255]
; AVX1-NEXT: vandps %ymm4, %ymm3, %ymm3
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5
; AVX1-NEXT: vpackusdw %xmm5, %xmm3, %xmm3
Expand Down Expand Up @@ -3503,7 +3506,7 @@ define <16 x i8> @trunc_xor_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin
; AVX1: # %bb.0:
; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vxorps %ymm3, %ymm1, %ymm1
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
Expand Down Expand Up @@ -3625,7 +3628,8 @@ define <4 x i32> @trunc_xor_const_v4i64_v4i32(<4 x i64> %a0) nounwind {
;
; AVX2-FAST-ALL-LABEL: trunc_xor_const_v4i64_v4i32:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
; AVX2-FAST-ALL-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [0,2,4,6,0,2,4,6]
; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-FAST-ALL-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-FAST-ALL-NEXT: vzeroupper
Expand Down Expand Up @@ -3670,7 +3674,7 @@ define <8 x i16> @trunc_xor_const_v8i64_v8i16(<8 x i64> %a0) nounwind {
;
; AVX1-LABEL: trunc_xor_const_v8i64_v8i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65535,65535,65535,65535]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
Expand Down Expand Up @@ -3770,7 +3774,7 @@ define <16 x i8> @trunc_xor_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
;
; AVX1-LABEL: trunc_xor_const_v16i64_v16i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [255,255,255,255]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm4 = [255,255,255,255]
; AVX1-NEXT: vandps %ymm4, %ymm3, %ymm3
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5
; AVX1-NEXT: vpackusdw %xmm5, %xmm3, %xmm3
Expand Down Expand Up @@ -3838,7 +3842,7 @@ define <16 x i8> @trunc_xor_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
;
; AVX1-LABEL: trunc_xor_const_v16i32_v16i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
Expand Down Expand Up @@ -3961,7 +3965,8 @@ define <4 x i32> @trunc_or_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; AVX2-FAST-ALL-LABEL: trunc_or_v4i64_v4i32:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
; AVX2-FAST-ALL-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [0,2,4,6,0,2,4,6]
; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-FAST-ALL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX2-FAST-ALL-NEXT: vzeroupper
Expand Down Expand Up @@ -4011,7 +4016,7 @@ define <8 x i16> @trunc_or_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind {
; AVX1: # %bb.0:
; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65535,65535,65535,65535]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
Expand Down Expand Up @@ -4124,7 +4129,7 @@ define <16 x i8> @trunc_or_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwind
; AVX1-NEXT: vorps %ymm5, %ymm1, %ymm1
; AVX1-NEXT: vorps %ymm6, %ymm2, %ymm2
; AVX1-NEXT: vorps %ymm7, %ymm3, %ymm3
; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [255,255,255,255]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm4 = [255,255,255,255]
; AVX1-NEXT: vandps %ymm4, %ymm3, %ymm3
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5
; AVX1-NEXT: vpackusdw %xmm5, %xmm3, %xmm3
Expand Down Expand Up @@ -4200,7 +4205,7 @@ define <16 x i8> @trunc_or_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwind
; AVX1: # %bb.0:
; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
Expand Down Expand Up @@ -4322,7 +4327,8 @@ define <4 x i32> @trunc_or_const_v4i64_v4i32(<4 x i64> %a0) nounwind {
;
; AVX2-FAST-ALL-LABEL: trunc_or_const_v4i64_v4i32:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
; AVX2-FAST-ALL-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [0,2,4,6,0,2,4,6]
; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-FAST-ALL-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-FAST-ALL-NEXT: vzeroupper
Expand Down Expand Up @@ -4367,7 +4373,7 @@ define <8 x i16> @trunc_or_const_v8i64_v8i16(<8 x i64> %a0) nounwind {
;
; AVX1-LABEL: trunc_or_const_v8i64_v8i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65535,65535,65535,65535]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
Expand Down Expand Up @@ -4467,7 +4473,7 @@ define <16 x i8> @trunc_or_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
;
; AVX1-LABEL: trunc_or_const_v16i64_v16i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [255,255,255,255]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm4 = [255,255,255,255]
; AVX1-NEXT: vandps %ymm4, %ymm3, %ymm3
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5
; AVX1-NEXT: vpackusdw %xmm5, %xmm3, %xmm3
Expand Down Expand Up @@ -4535,7 +4541,7 @@ define <16 x i8> @trunc_or_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
;
; AVX1-LABEL: trunc_or_const_v16i32_v16i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/X86/vector-trunc-ssat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,8 @@ define <4 x i32> @trunc_ssat_v4i64_v4i32(<4 x i64> %a0) {
; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
; AVX2-FAST-ALL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-FAST-ALL-NEXT: vmovapd {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
; AVX2-FAST-ALL-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [0,2,4,6,0,2,4,6]
; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-FAST-ALL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX2-FAST-ALL-NEXT: vzeroupper
Expand Down
63 changes: 42 additions & 21 deletions llvm/test/CodeGen/X86/vector-trunc-usat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ define <2 x i32> @trunc_usat_v2i64_v2i32(<2 x i64> %a0) {
;
; AVX-LABEL: trunc_usat_v2i64_v2i32:
; AVX: # %bb.0:
; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [4294967295,4294967295]
; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [4294967295,4294967295]
; AVX-NEXT: # xmm1 = mem[0,0]
; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103]
; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Expand Down Expand Up @@ -166,7 +167,8 @@ define void @trunc_usat_v2i64_v2i32_store(<2 x i64> %a0, ptr %p1) {
;
; AVX-LABEL: trunc_usat_v2i64_v2i32_store:
; AVX: # %bb.0:
; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [4294967295,4294967295]
; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [4294967295,4294967295]
; AVX-NEXT: # xmm1 = mem[0,0]
; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103]
; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Expand Down Expand Up @@ -307,7 +309,8 @@ define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) {
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
; AVX1-NEXT: vmovapd {{.*#+}} xmm3 = [4294967295,429496729]
; AVX1-NEXT: vblendvpd %xmm1, %xmm4, %xmm3, %xmm1
; AVX1-NEXT: vmovapd {{.*#+}} xmm3 = [4294967295,4294967295]
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [4294967295,4294967295]
; AVX1-NEXT: # xmm3 = mem[0,0]
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
; AVX1-NEXT: vzeroupper
Expand All @@ -334,7 +337,8 @@ define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) {
; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
; AVX2-FAST-ALL-NEXT: vmovapd {{.*#+}} ymm2 = [4294967295,4294967295,4294967295,429496729]
; AVX2-FAST-ALL-NEXT: vblendvpd %ymm1, %ymm0, %ymm2, %ymm0
; AVX2-FAST-ALL-NEXT: vmovapd {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
; AVX2-FAST-ALL-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [0,2,4,6,0,2,4,6]
; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-FAST-ALL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX2-FAST-ALL-NEXT: vzeroupper
Expand Down Expand Up @@ -582,7 +586,8 @@ define <8 x i32> @trunc_usat_v8i64_v8i32(ptr %p0) {
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm5
; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [9223372041149743103,9223372041149743103]
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
; AVX1-NEXT: vmovapd {{.*#+}} xmm7 = [4294967295,4294967295]
; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [4294967295,4294967295]
; AVX1-NEXT: # xmm7 = mem[0,0]
; AVX1-NEXT: vblendvpd %xmm5, %xmm2, %xmm7, %xmm2
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
Expand Down Expand Up @@ -728,7 +733,8 @@ define <2 x i16> @trunc_usat_v2i64_v2i16(<2 x i64> %a0) {
;
; AVX1-LABEL: trunc_usat_v2i64_v2i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [65535,65535]
; AVX1-NEXT: # xmm1 = mem[0,0]
; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Expand All @@ -739,7 +745,8 @@ define <2 x i16> @trunc_usat_v2i64_v2i16(<2 x i64> %a0) {
;
; AVX2-SLOW-LABEL: trunc_usat_v2i64_v2i16:
; AVX2-SLOW: # %bb.0:
; AVX2-SLOW-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
; AVX2-SLOW-NEXT: vmovddup {{.*#+}} xmm1 = [65535,65535]
; AVX2-SLOW-NEXT: # xmm1 = mem[0,0]
; AVX2-SLOW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
; AVX2-SLOW-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Expand All @@ -750,7 +757,8 @@ define <2 x i16> @trunc_usat_v2i64_v2i16(<2 x i64> %a0) {
;
; AVX2-FAST-LABEL: trunc_usat_v2i64_v2i16:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
; AVX2-FAST-NEXT: vmovddup {{.*#+}} xmm1 = [65535,65535]
; AVX2-FAST-NEXT: # xmm1 = mem[0,0]
; AVX2-FAST-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
; AVX2-FAST-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Expand Down Expand Up @@ -848,7 +856,8 @@ define void @trunc_usat_v2i64_v2i16_store(<2 x i64> %a0, ptr %p1) {
;
; AVX1-LABEL: trunc_usat_v2i64_v2i16_store:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [65535,65535]
; AVX1-NEXT: # xmm1 = mem[0,0]
; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Expand All @@ -860,7 +869,8 @@ define void @trunc_usat_v2i64_v2i16_store(<2 x i64> %a0, ptr %p1) {
;
; AVX2-SLOW-LABEL: trunc_usat_v2i64_v2i16_store:
; AVX2-SLOW: # %bb.0:
; AVX2-SLOW-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
; AVX2-SLOW-NEXT: vmovddup {{.*#+}} xmm1 = [65535,65535]
; AVX2-SLOW-NEXT: # xmm1 = mem[0,0]
; AVX2-SLOW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
; AVX2-SLOW-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Expand All @@ -872,7 +882,8 @@ define void @trunc_usat_v2i64_v2i16_store(<2 x i64> %a0, ptr %p1) {
;
; AVX2-FAST-LABEL: trunc_usat_v2i64_v2i16_store:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
; AVX2-FAST-NEXT: vmovddup {{.*#+}} xmm1 = [65535,65535]
; AVX2-FAST-NEXT: # xmm1 = mem[0,0]
; AVX2-FAST-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
; AVX2-FAST-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Expand Down Expand Up @@ -1021,7 +1032,8 @@ define <4 x i16> @trunc_usat_v4i64_v4i16(<4 x i64> %a0) {
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [9223372036854841343,9223372036854841343]
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vmovapd {{.*#+}} xmm5 = [65535,65535]
; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [65535,65535]
; AVX1-NEXT: # xmm5 = mem[0,0]
; AVX1-NEXT: vblendvpd %xmm3, %xmm1, %xmm5, %xmm1
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
Expand Down Expand Up @@ -1187,7 +1199,8 @@ define void @trunc_usat_v4i64_v4i16_store(<4 x i64> %a0, ptr%p1) {
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [9223372036854841343,9223372036854841343]
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vmovapd {{.*#+}} xmm5 = [65535,65535]
; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [65535,65535]
; AVX1-NEXT: # xmm5 = mem[0,0]
; AVX1-NEXT: vblendvpd %xmm3, %xmm1, %xmm5, %xmm1
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
Expand Down Expand Up @@ -1447,7 +1460,8 @@ define <8 x i16> @trunc_usat_v8i64_v8i16(ptr %p0) {
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm5
; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [9223372036854841343,9223372036854841343]
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
; AVX1-NEXT: vmovapd {{.*#+}} xmm7 = [65535,65535]
; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [65535,65535]
; AVX1-NEXT: # xmm7 = mem[0,0]
; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm7, %xmm1
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
Expand Down Expand Up @@ -1993,7 +2007,8 @@ define <2 x i8> @trunc_usat_v2i64_v2i8(<2 x i64> %a0) {
;
; AVX-LABEL: trunc_usat_v2i64_v2i8:
; AVX: # %bb.0:
; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [255,255]
; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [255,255]
; AVX-NEXT: # xmm1 = mem[0,0]
; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063]
; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Expand Down Expand Up @@ -2093,7 +2108,8 @@ define void @trunc_usat_v2i64_v2i8_store(<2 x i64> %a0, ptr %p1) {
;
; AVX-LABEL: trunc_usat_v2i64_v2i8_store:
; AVX: # %bb.0:
; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [255,255]
; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [255,255]
; AVX-NEXT: # xmm1 = mem[0,0]
; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063]
; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Expand Down Expand Up @@ -2241,7 +2257,8 @@ define <4 x i8> @trunc_usat_v4i64_v4i8(<4 x i64> %a0) {
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063]
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vmovapd {{.*#+}} xmm4 = [255,255]
; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [255,255]
; AVX1-NEXT: # xmm4 = mem[0,0]
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm4, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm1
Expand Down Expand Up @@ -2411,7 +2428,8 @@ define void @trunc_usat_v4i64_v4i8_store(<4 x i64> %a0, ptr%p1) {
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063]
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vmovapd {{.*#+}} xmm4 = [255,255]
; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [255,255]
; AVX1-NEXT: # xmm4 = mem[0,0]
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm4, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm1
Expand Down Expand Up @@ -2663,7 +2681,8 @@ define <8 x i8> @trunc_usat_v8i64_v8i8(ptr %p0) {
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm5
; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [9223372036854776063,9223372036854776063]
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
; AVX1-NEXT: vmovapd {{.*#+}} xmm7 = [255,255]
; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [255,255]
; AVX1-NEXT: # xmm7 = mem[0,0]
; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm7, %xmm1
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
Expand Down Expand Up @@ -2904,7 +2923,8 @@ define void @trunc_usat_v8i64_v8i8_store(ptr %p0, ptr%p1) {
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm5
; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [9223372036854776063,9223372036854776063]
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
; AVX1-NEXT: vmovapd {{.*#+}} xmm7 = [255,255]
; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [255,255]
; AVX1-NEXT: # xmm7 = mem[0,0]
; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm7, %xmm1
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
Expand Down Expand Up @@ -3295,7 +3315,8 @@ define <16 x i8> @trunc_usat_v16i64_v16i8(ptr %p0) {
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm5
; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [9223372036854776063,9223372036854776063]
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
; AVX1-NEXT: vmovapd {{.*#+}} xmm7 = [255,255]
; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [255,255]
; AVX1-NEXT: # xmm7 = mem[0,0]
; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm7, %xmm1
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/X86/vector-trunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ define <8 x i16> @trunc8i64_8i16(<8 x i64> %a) {
;
; AVX1-LABEL: trunc8i64_8i16:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65535,65535,65535,65535]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
Expand Down Expand Up @@ -279,7 +279,7 @@ define void @trunc8i64_8i8(<8 x i64> %a) {
;
; AVX1-LABEL: trunc8i64_8i8:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [255,255,255,255]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [255,255,255,255]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
Expand Down Expand Up @@ -671,7 +671,7 @@ define void @trunc16i32_16i16(<16 x i32> %a) {
;
; AVX1-LABEL: trunc16i32_16i16:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
Expand Down Expand Up @@ -897,7 +897,7 @@ define void @trunc16i32_16i8(<16 x i32> %a) {
;
; AVX1-LABEL: trunc16i32_16i8:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
Expand Down Expand Up @@ -1295,7 +1295,7 @@ define void @trunc32i16_32i8(<32 x i16> %a) {
;
; AVX1-LABEL: trunc32i16_32i8:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
Expand Down Expand Up @@ -1733,7 +1733,7 @@ define <32 x i8> @trunc2x16i16_32i8(<16 x i16> %a, <16 x i16> %b) {
;
; AVX1-LABEL: trunc2x16i16_32i8:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
Expand Down Expand Up @@ -2146,7 +2146,7 @@ define void @store_merge_split(<8 x i32> %w1, <8 x i32> %w2, i64 %idx, ptr %p) a
;
; AVX1-LABEL: store_merge_split:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/vselect-avx.ll
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ define void @test2(ptr %call1559, i64 %indvars.iv4198, <4 x i1> %tmp1895) {
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: movq (%rdi,%rsi,8), %rax
; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [5.0E-1,5.0E-1,5.0E-1,5.0E-1]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [5.0E-1,5.0E-1,5.0E-1,5.0E-1]
; AVX1-NEXT: vblendvpd %ymm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
; AVX1-NEXT: vmovupd %ymm0, (%rax)
; AVX1-NEXT: vzeroupper
Expand Down
6 changes: 4 additions & 2 deletions llvm/test/CodeGen/X86/vselect-zero.ll
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ define double @fsel_nonzero_false_val(double %x, double %y, double %z) {
; AVX-LABEL: fsel_nonzero_false_val:
; AVX: # %bb.0:
; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [4.2E+1,4.2E+1]
; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [4.2E+1,4.2E+1]
; AVX-NEXT: # xmm1 = mem[0,0]
; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
; AVX-NEXT: retq
;
Expand Down Expand Up @@ -179,7 +180,8 @@ define double @fsel_nonzero_constants(double %x, double %y) {
; AVX-LABEL: fsel_nonzero_constants:
; AVX: # %bb.0:
; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [4.2E+1,4.2E+1]
; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [4.2E+1,4.2E+1]
; AVX-NEXT: # xmm1 = mem[0,0]
; AVX-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
; AVX-NEXT: retq
;
Expand Down
14 changes: 6 additions & 8 deletions llvm/test/CodeGen/X86/win_cst_pool.ll
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,14 @@ define <8 x i16> @vec2() {
define <4 x float> @undef1() {
ret <4 x float> <float 1.0, float 1.0, float undef, float undef>

; CHECK: .globl __xmm@00000000000000003f8000003f800000
; CHECK-NEXT: .section .rdata,"dr",discard,__xmm@00000000000000003f8000003f800000
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: __xmm@00000000000000003f8000003f800000:
; CHECK-NEXT: .long 0x3f800000 # float 1
; CHECK: .globl __real@3f800000
; CHECK-NEXT: .section .rdata,"dr",discard,__real@3f800000
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: __real@3f800000:
; CHECK-NEXT: .long 0x3f800000 # float 1
; CHECK-NEXT: .zero 4
; CHECK-NEXT: .zero 4
; CHECK-NEXT: .text
; CHECK: undef1:
; CHECK: movaps __xmm@00000000000000003f8000003f800000(%rip), %xmm0
; CHECK: vbroadcastss __real@3f800000(%rip), %xmm0
; CHECK-NEXT: ret
}

Expand Down