54 changes: 27 additions & 27 deletions llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,13 @@ define <4 x i64> @strict_vector_fptosi_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-64-NEXT: vcvttsd2si %xmm1, %rax
; AVX-64-NEXT: vmovq %rax, %xmm2
; AVX-64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX-64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX-64-NEXT: vcvttsd2si %xmm1, %rax
; AVX-64-NEXT: vmovq %rax, %xmm1
; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX-64-NEXT: vcvttsd2si %xmm0, %rax
; AVX-64-NEXT: vmovq %rax, %xmm2
; AVX-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-64-NEXT: vcvttsd2si %xmm0, %rax
; AVX-64-NEXT: vmovq %rax, %xmm0
; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
Expand Down Expand Up @@ -132,13 +132,13 @@ define <4 x i64> @strict_vector_fptosi_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512F-64-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512F-64-NEXT: vcvttsd2si %xmm1, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm2
; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX512F-64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX512F-64-NEXT: vcvttsd2si %xmm1, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm1
; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX512F-64-NEXT: vcvttsd2si %xmm0, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm2
; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512F-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512F-64-NEXT: vcvttsd2si %xmm0, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm0
; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
Expand Down Expand Up @@ -187,13 +187,13 @@ define <4 x i64> @strict_vector_fptosi_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512VL-64-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512VL-64-NEXT: vcvttsd2si %xmm1, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX512VL-64-NEXT: vcvttsd2si %xmm1, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm1
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX512VL-64-NEXT: vcvttsd2si %xmm0, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-64-NEXT: vcvttsd2si %xmm0, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm0
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
Expand Down Expand Up @@ -226,7 +226,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX-32-NEXT: .cfi_def_cfa_register %ebp
; AVX-32-NEXT: andl $-8, %esp
; AVX-32-NEXT: subl $32, %esp
; AVX-32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX-32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-32-NEXT: vcomisd %xmm1, %xmm2
; AVX-32-NEXT: vmovapd %xmm1, %xmm3
Expand All @@ -244,7 +244,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX-32-NEXT: shll $31, %eax
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
; AVX-32-NEXT: vshufpd {{.*#+}} xmm3 = xmm2[1,0]
; AVX-32-NEXT: vcomisd %xmm1, %xmm3
; AVX-32-NEXT: vmovapd %xmm1, %xmm4
; AVX-32-NEXT: jae .LBB1_4
Expand Down Expand Up @@ -320,7 +320,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX-64-NEXT: movzbl %al, %eax
; AVX-64-NEXT: shlq $63, %rax
; AVX-64-NEXT: xorq %rcx, %rax
; AVX-64-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
; AVX-64-NEXT: vshufpd {{.*#+}} xmm4 = xmm3[1,0]
; AVX-64-NEXT: vcomisd %xmm1, %xmm4
; AVX-64-NEXT: vxorpd %xmm5, %xmm5, %xmm5
; AVX-64-NEXT: jb .LBB1_4
Expand Down Expand Up @@ -349,7 +349,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
; AVX-64-NEXT: vmovq %rcx, %xmm4
; AVX-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-64-NEXT: vcomisd %xmm1, %xmm0
; AVX-64-NEXT: jb .LBB1_8
; AVX-64-NEXT: # %bb.7:
Expand Down Expand Up @@ -378,7 +378,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512F-32-NEXT: subl $40, %esp
; AVX512F-32-NEXT: .cfi_offset %ebx, -12
; AVX512F-32-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX512F-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
; AVX512F-32-NEXT: vshufpd {{.*#+}} xmm3 = xmm2[1,0]
; AVX512F-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX512F-32-NEXT: xorl %eax, %eax
; AVX512F-32-NEXT: vcomisd %xmm1, %xmm3
Expand All @@ -394,7 +394,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512F-32-NEXT: vmovsd %xmm1, %xmm1, %xmm3 {%k1} {z}
; AVX512F-32-NEXT: vsubsd %xmm3, %xmm2, %xmm2
; AVX512F-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512F-32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512F-32-NEXT: xorl %ecx, %ecx
; AVX512F-32-NEXT: vcomisd %xmm1, %xmm2
; AVX512F-32-NEXT: setae %cl
Expand Down Expand Up @@ -446,13 +446,13 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512F-64-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512F-64-NEXT: vcvttsd2usi %xmm1, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm2
; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX512F-64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX512F-64-NEXT: vcvttsd2usi %xmm1, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm1
; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX512F-64-NEXT: vcvttsd2usi %xmm0, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm2
; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512F-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512F-64-NEXT: vcvttsd2usi %xmm0, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm0
; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
Expand All @@ -471,7 +471,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512VL-32-NEXT: subl $40, %esp
; AVX512VL-32-NEXT: .cfi_offset %ebx, -12
; AVX512VL-32-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
; AVX512VL-32-NEXT: vshufpd {{.*#+}} xmm3 = xmm2[1,0]
; AVX512VL-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX512VL-32-NEXT: xorl %eax, %eax
; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm3
Expand All @@ -487,7 +487,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512VL-32-NEXT: vmovsd %xmm1, %xmm1, %xmm3 {%k1} {z}
; AVX512VL-32-NEXT: vsubsd %xmm3, %xmm2, %xmm2
; AVX512VL-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512VL-32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512VL-32-NEXT: xorl %ecx, %ecx
; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm2
; AVX512VL-32-NEXT: setae %cl
Expand Down Expand Up @@ -539,13 +539,13 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512VL-64-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512VL-64-NEXT: vcvttsd2usi %xmm1, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX512VL-64-NEXT: vcvttsd2usi %xmm1, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm1
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX512VL-64-NEXT: vcvttsd2usi %xmm0, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-64-NEXT: vcvttsd2usi %xmm0, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm0
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
Expand Down Expand Up @@ -610,7 +610,7 @@ define <4 x i64> @strict_vector_fptosi_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX-64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX-64-NEXT: vcvttss2si %xmm1, %rax
; AVX-64-NEXT: vmovq %rax, %xmm1
; AVX-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX-64-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX-64-NEXT: vcvttss2si %xmm2, %rax
; AVX-64-NEXT: vmovq %rax, %xmm2
; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Expand Down Expand Up @@ -664,7 +664,7 @@ define <4 x i64> @strict_vector_fptosi_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512F-64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX512F-64-NEXT: vcvttss2si %xmm1, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm1
; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512F-64-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512F-64-NEXT: vcvttss2si %xmm2, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm2
; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Expand Down Expand Up @@ -718,7 +718,7 @@ define <4 x i64> @strict_vector_fptosi_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512VL-64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX512VL-64-NEXT: vcvttss2si %xmm1, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm1
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512VL-64-NEXT: vcvttss2si %xmm2, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Expand Down Expand Up @@ -790,7 +790,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX-32-NEXT: movzbl %cl, %ecx
; AVX-32-NEXT: shll $31, %ecx
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX-32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX-32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX-32-NEXT: vcomiss %xmm1, %xmm2
; AVX-32-NEXT: vmovaps %xmm1, %xmm3
; AVX-32-NEXT: jae .LBB3_6
Expand Down Expand Up @@ -851,7 +851,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX-64-NEXT: movzbl %al, %eax
; AVX-64-NEXT: shlq $63, %rax
; AVX-64-NEXT: xorq %rcx, %rax
; AVX-64-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0]
; AVX-64-NEXT: vshufpd {{.*#+}} xmm4 = xmm0[1,0]
; AVX-64-NEXT: vcomiss %xmm1, %xmm4
; AVX-64-NEXT: vxorps %xmm5, %xmm5, %xmm5
; AVX-64-NEXT: jb .LBB3_4
Expand Down Expand Up @@ -917,7 +917,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512F-32-NEXT: vmovss %xmm1, %xmm1, %xmm3 {%k1} {z}
; AVX512F-32-NEXT: vsubss %xmm3, %xmm2, %xmm2
; AVX512F-32-NEXT: vmovss %xmm2, (%esp)
; AVX512F-32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512F-32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512F-32-NEXT: xorl %edx, %edx
; AVX512F-32-NEXT: vcomiss %xmm1, %xmm2
; AVX512F-32-NEXT: setae %dl
Expand Down Expand Up @@ -977,7 +977,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512F-64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX512F-64-NEXT: vcvttss2usi %xmm1, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm1
; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512F-64-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512F-64-NEXT: vcvttss2usi %xmm2, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm2
; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Expand Down Expand Up @@ -1010,7 +1010,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovss %xmm1, %xmm1, %xmm3 {%k1} {z}
; AVX512VL-32-NEXT: vsubss %xmm3, %xmm2, %xmm2
; AVX512VL-32-NEXT: vmovss %xmm2, (%esp)
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512VL-32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512VL-32-NEXT: xorl %edx, %edx
; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm2
; AVX512VL-32-NEXT: setae %dl
Expand Down Expand Up @@ -1070,7 +1070,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512VL-64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX512VL-64-NEXT: vcvttss2usi %xmm1, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm1
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512VL-64-NEXT: vcvttss2usi %xmm2, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -95,28 +95,28 @@ define <8 x i64> @strict_vector_fptosi_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-64-NEXT: vextractf32x4 $3, %zmm0, %xmm1
; AVX512VL-64-NEXT: vcvttsd2si %xmm1, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX512VL-64-NEXT: vcvttsd2si %xmm1, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm1
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX512VL-64-NEXT: vextractf32x4 $2, %zmm0, %xmm2
; AVX512VL-64-NEXT: vcvttsd2si %xmm2, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
; AVX512VL-64-NEXT: vcvttsd2si %xmm2, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
; AVX512VL-64-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX512VL-64-NEXT: vcvttsd2si %xmm2, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
; AVX512VL-64-NEXT: vcvttsd2si %xmm2, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
; AVX512VL-64-NEXT: vcvttsd2si %xmm0, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-64-NEXT: vcvttsd2si %xmm0, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm0
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
Expand Down Expand Up @@ -150,7 +150,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: .cfi_offset %edi, -16
; AVX512VL-32-NEXT: .cfi_offset %ebx, -12
; AVX512VL-32-NEXT: vextractf32x4 $3, %zmm0, %xmm2
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
; AVX512VL-32-NEXT: vshufpd {{.*#+}} xmm3 = xmm2[1,0]
; AVX512VL-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX512VL-32-NEXT: xorl %eax, %eax
; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm3
Expand All @@ -169,7 +169,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: vsubsd %xmm3, %xmm2, %xmm2
; AVX512VL-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: vextractf32x4 $2, %zmm0, %xmm2
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
; AVX512VL-32-NEXT: vshufpd {{.*#+}} xmm3 = xmm2[1,0]
; AVX512VL-32-NEXT: xorl %eax, %eax
; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm3
; AVX512VL-32-NEXT: setae %al
Expand All @@ -186,7 +186,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: vsubsd %xmm3, %xmm2, %xmm2
; AVX512VL-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
; AVX512VL-32-NEXT: vshufpd {{.*#+}} xmm3 = xmm2[1,0]
; AVX512VL-32-NEXT: xorl %eax, %eax
; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm3
; AVX512VL-32-NEXT: setae %al
Expand All @@ -202,7 +202,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: vmovsd %xmm1, %xmm1, %xmm3 {%k1} {z}
; AVX512VL-32-NEXT: vsubsd %xmm3, %xmm2, %xmm2
; AVX512VL-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512VL-32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512VL-32-NEXT: xorl %ebx, %ebx
; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm2
; AVX512VL-32-NEXT: setae %bl
Expand Down Expand Up @@ -284,28 +284,28 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-64-NEXT: vextractf32x4 $3, %zmm0, %xmm1
; AVX512VL-64-NEXT: vcvttsd2usi %xmm1, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX512VL-64-NEXT: vcvttsd2usi %xmm1, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm1
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX512VL-64-NEXT: vextractf32x4 $2, %zmm0, %xmm2
; AVX512VL-64-NEXT: vcvttsd2usi %xmm2, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
; AVX512VL-64-NEXT: vcvttsd2usi %xmm2, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
; AVX512VL-64-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX512VL-64-NEXT: vcvttsd2usi %xmm2, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
; AVX512VL-64-NEXT: vcvttsd2usi %xmm2, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
; AVX512VL-64-NEXT: vcvttsd2usi %xmm0, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-64-NEXT: vcvttsd2usi %xmm0, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm0
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
Expand Down Expand Up @@ -388,7 +388,7 @@ define <8 x i64> @strict_vector_fptosi_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-64-NEXT: vshufps {{.*#+}} xmm2 = xmm1[3,3,3,3]
; AVX512VL-64-NEXT: vcvttss2si %xmm2, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm3 = xmm1[1,0]
; AVX512VL-64-NEXT: vcvttss2si %xmm3, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
Expand All @@ -402,7 +402,7 @@ define <8 x i64> @strict_vector_fptosi_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-64-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
; AVX512VL-64-NEXT: vcvttss2si %xmm2, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
; AVX512VL-64-NEXT: vcvttss2si %xmm3, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
Expand Down Expand Up @@ -452,7 +452,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovss %xmm1, %xmm1, %xmm4 {%k1} {z}
; AVX512VL-32-NEXT: vsubss %xmm4, %xmm3, %xmm3
; AVX512VL-32-NEXT: vmovss %xmm3, (%esp)
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
; AVX512VL-32-NEXT: vshufpd {{.*#+}} xmm3 = xmm2[1,0]
; AVX512VL-32-NEXT: xorl %eax, %eax
; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm3
; AVX512VL-32-NEXT: setae %al
Expand Down Expand Up @@ -486,7 +486,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovss %xmm1, %xmm1, %xmm3 {%k1} {z}
; AVX512VL-32-NEXT: vsubss %xmm3, %xmm2, %xmm2
; AVX512VL-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512VL-32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512VL-32-NEXT: xorl %ecx, %ecx
; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm2
; AVX512VL-32-NEXT: setae %cl
Expand Down Expand Up @@ -577,7 +577,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-64-NEXT: vshufps {{.*#+}} xmm2 = xmm1[3,3,3,3]
; AVX512VL-64-NEXT: vcvttss2usi %xmm2, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm3 = xmm1[1,0]
; AVX512VL-64-NEXT: vcvttss2usi %xmm3, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
Expand All @@ -591,7 +591,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-64-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
; AVX512VL-64-NEXT: vcvttss2usi %xmm2, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
; AVX512VL-64-NEXT: vcvttss2usi %xmm3, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
Expand Down
76 changes: 38 additions & 38 deletions llvm/test/CodeGen/X86/vec_fp_to_int.ll

Large diffs are not rendered by default.

52 changes: 26 additions & 26 deletions llvm/test/CodeGen/X86/vector-half-conversions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3084,7 +3084,7 @@ define <2 x i16> @cvt_2f64_to_2i16(<2 x double> %a0) nounwind {
; AVX: # %bb.0:
; AVX-NEXT: subq $40, %rsp
; AVX-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-NEXT: callq __truncdfhf2@PLT
; AVX-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; AVX-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
Expand Down Expand Up @@ -3117,7 +3117,7 @@ define <4 x i16> @cvt_4f64_to_4i16(<4 x double> %a0) nounwind {
; AVX1: # %bb.0:
; AVX1-NEXT: subq $88, %rsp
; AVX1-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2@PLT
; AVX1-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand All @@ -3129,7 +3129,7 @@ define <4 x i16> @cvt_4f64_to_4i16(<4 x double> %a0) nounwind {
; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2@PLT
; AVX1-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand All @@ -3148,7 +3148,7 @@ define <4 x i16> @cvt_4f64_to_4i16(<4 x double> %a0) nounwind {
; AVX2: # %bb.0:
; AVX2-NEXT: subq $88, %rsp
; AVX2-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2@PLT
; AVX2-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand All @@ -3160,7 +3160,7 @@ define <4 x i16> @cvt_4f64_to_4i16(<4 x double> %a0) nounwind {
; AVX2-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX2-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2@PLT
; AVX2-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand Down Expand Up @@ -3249,7 +3249,7 @@ define <8 x i16> @cvt_4f64_to_8i16_undef(<4 x double> %a0) nounwind {
; AVX1: # %bb.0:
; AVX1-NEXT: subq $88, %rsp
; AVX1-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2@PLT
; AVX1-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand All @@ -3261,7 +3261,7 @@ define <8 x i16> @cvt_4f64_to_8i16_undef(<4 x double> %a0) nounwind {
; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2@PLT
; AVX1-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand All @@ -3280,7 +3280,7 @@ define <8 x i16> @cvt_4f64_to_8i16_undef(<4 x double> %a0) nounwind {
; AVX2: # %bb.0:
; AVX2-NEXT: subq $88, %rsp
; AVX2-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2@PLT
; AVX2-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand All @@ -3292,7 +3292,7 @@ define <8 x i16> @cvt_4f64_to_8i16_undef(<4 x double> %a0) nounwind {
; AVX2-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX2-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2@PLT
; AVX2-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand Down Expand Up @@ -3382,7 +3382,7 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) nounwind {
; AVX1: # %bb.0:
; AVX1-NEXT: subq $88, %rsp
; AVX1-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2@PLT
; AVX1-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand All @@ -3394,7 +3394,7 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) nounwind {
; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2@PLT
; AVX1-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand All @@ -3413,7 +3413,7 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) nounwind {
; AVX2: # %bb.0:
; AVX2-NEXT: subq $88, %rsp
; AVX2-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2@PLT
; AVX2-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand All @@ -3425,7 +3425,7 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) nounwind {
; AVX2-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX2-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2@PLT
; AVX2-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand Down Expand Up @@ -3736,7 +3736,7 @@ define void @store_cvt_2f64_to_2i16(<2 x double> %a0, ptr %a1) nounwind {
; AVX-NEXT: subq $32, %rsp
; AVX-NEXT: movq %rdi, %rbx
; AVX-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-NEXT: callq __truncdfhf2@PLT
; AVX-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
Expand Down Expand Up @@ -3796,14 +3796,14 @@ define void @store_cvt_4f64_to_4i16(<4 x double> %a0, ptr %a1) nounwind {
; AVX1-NEXT: subq $80, %rsp
; AVX1-NEXT: movq %rdi, %rbx
; AVX1-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2@PLT
; AVX1-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2@PLT
; AVX1-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand Down Expand Up @@ -3831,14 +3831,14 @@ define void @store_cvt_4f64_to_4i16(<4 x double> %a0, ptr %a1) nounwind {
; AVX2-NEXT: subq $80, %rsp
; AVX2-NEXT: movq %rdi, %rbx
; AVX2-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2@PLT
; AVX2-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX2-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX2-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2@PLT
; AVX2-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand Down Expand Up @@ -3940,7 +3940,7 @@ define void @store_cvt_4f64_to_8i16_undef(<4 x double> %a0, ptr %a1) nounwind {
; AVX1-NEXT: subq $80, %rsp
; AVX1-NEXT: movq %rdi, %rbx
; AVX1-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2@PLT
; AVX1-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand All @@ -3952,7 +3952,7 @@ define void @store_cvt_4f64_to_8i16_undef(<4 x double> %a0, ptr %a1) nounwind {
; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2@PLT
; AVX1-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand All @@ -3975,7 +3975,7 @@ define void @store_cvt_4f64_to_8i16_undef(<4 x double> %a0, ptr %a1) nounwind {
; AVX2-NEXT: subq $80, %rsp
; AVX2-NEXT: movq %rdi, %rbx
; AVX2-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2@PLT
; AVX2-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand All @@ -3987,7 +3987,7 @@ define void @store_cvt_4f64_to_8i16_undef(<4 x double> %a0, ptr %a1) nounwind {
; AVX2-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX2-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2@PLT
; AVX2-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand Down Expand Up @@ -4090,7 +4090,7 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, ptr %a1) nounwind {
; AVX1-NEXT: subq $80, %rsp
; AVX1-NEXT: movq %rdi, %rbx
; AVX1-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2@PLT
; AVX1-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand All @@ -4102,7 +4102,7 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, ptr %a1) nounwind {
; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2@PLT
; AVX1-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand All @@ -4125,7 +4125,7 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, ptr %a1) nounwind {
; AVX2-NEXT: subq $80, %rsp
; AVX2-NEXT: movq %rdi, %rbx
; AVX2-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2@PLT
; AVX2-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand All @@ -4137,7 +4137,7 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, ptr %a1) nounwind {
; AVX2-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX2-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2@PLT
; AVX2-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/vector-interleave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -582,9 +582,9 @@ define void @splat2_i64(ptr %s, ptr %d) {
; AVX1-LABEL: splat2_i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vperm2f128 $51, (%rdi), %ymm0, %ymm0 # ymm0 = mem[2,3,2,3]
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,3]
; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0,0,3,3]
; AVX1-NEXT: vbroadcastf128 (%rdi), %ymm1 # ymm1 = mem[0,1,0,1]
; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,3,3]
; AVX1-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[0,0,3,3]
; AVX1-NEXT: vmovupd %ymm0, 32(%rsi)
; AVX1-NEXT: vmovupd %ymm1, (%rsi)
; AVX1-NEXT: vzeroupper
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-5.ll
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ define void @load_i32_stride5_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm7 = ymm1[0,1,2,3],ymm0[4,5],ymm1[6,7]
; AVX1-ONLY-NEXT: vextractf128 $1, %ymm7, %xmm7
; AVX1-ONLY-NEXT: vblendps {{.*#+}} xmm7 = xmm7[0,1],mem[2],xmm7[3]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} xmm7 = xmm7[1,0]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} xmm7 = xmm7[1,0]
; AVX1-ONLY-NEXT: vinsertps {{.*#+}} xmm7 = xmm7[0,1,2],xmm6[1]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} xmm3 = xmm4[0,1],xmm3[2,3]
; AVX1-ONLY-NEXT: vshufps {{.*#+}} xmm3 = xmm3[3,0],mem[1,3]
Expand Down Expand Up @@ -443,7 +443,7 @@ define void @load_i32_stride5_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm11 = ymm4[0,1,2,3],ymm3[4,5],ymm4[6,7]
; AVX1-ONLY-NEXT: vextractf128 $1, %ymm11, %xmm11
; AVX1-ONLY-NEXT: vblendps {{.*#+}} xmm11 = xmm11[0,1],mem[2],xmm11[3]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} xmm11 = xmm11[1,0]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} xmm11 = xmm11[1,0]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm8 = ymm11[0,1,2],ymm8[3,4,5,6,7]
; AVX1-ONLY-NEXT: vperm2f128 {{.*#+}} ymm11 = ymm0[2,3,0,1]
; AVX1-ONLY-NEXT: vshufps {{.*#+}} ymm12 = ymm0[1,0],ymm11[0,0],ymm0[5,4],ymm11[4,4]
Expand Down Expand Up @@ -886,7 +886,7 @@ define void @load_i32_stride5_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm3 = ymm7[0,1,2,3],ymm13[4,5],ymm7[6,7]
; AVX1-ONLY-NEXT: vextractf128 $1, %ymm3, %xmm3
; AVX1-ONLY-NEXT: vblendps {{.*#+}} xmm3 = xmm3[0,1],mem[2],xmm3[3]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} xmm3 = xmm3[1,0]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2],ymm1[3,4,5,6,7]
; AVX1-ONLY-NEXT: vmovaps 128(%rdi), %ymm1
; AVX1-ONLY-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3,0,1]
Expand All @@ -900,7 +900,7 @@ define void @load_i32_stride5_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm3 = ymm12[0,1,2,3],ymm7[4,5],ymm12[6,7]
; AVX1-ONLY-NEXT: vextractf128 $1, %ymm3, %xmm3
; AVX1-ONLY-NEXT: vblendps {{.*#+}} xmm3 = xmm3[0,1],mem[2],xmm3[3]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} xmm3 = xmm3[1,0]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm13 = ymm3[0,1,2],ymm0[3,4,5,6,7]
; AVX1-ONLY-NEXT: vmovaps 288(%rdi), %ymm0
; AVX1-ONLY-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm0[2,3,0,1]
Expand Down Expand Up @@ -1852,7 +1852,7 @@ define void @load_i32_stride5_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX1-ONLY-NEXT: # ymm2 = ymm12[0,1,2,3],mem[4,5],ymm12[6,7]
; AVX1-ONLY-NEXT: vextractf128 $1, %ymm2, %xmm2
; AVX1-ONLY-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0,1],mem[2],xmm2[3]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2],ymm0[3,4,5,6,7]
; AVX1-ONLY-NEXT: vmovaps 288(%rdi), %ymm5
; AVX1-ONLY-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm5[2,3,0,1]
Expand All @@ -1869,7 +1869,7 @@ define void @load_i32_stride5_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX1-ONLY-NEXT: # ymm2 = mem[0,1,2,3],ymm1[4,5],mem[6,7]
; AVX1-ONLY-NEXT: vextractf128 $1, %ymm2, %xmm2
; AVX1-ONLY-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0,1],mem[2],xmm2[3]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2],ymm0[3,4,5,6,7]
; AVX1-ONLY-NEXT: vmovaps 608(%rdi), %ymm6
; AVX1-ONLY-NEXT: vperm2f128 {{.*#+}} ymm12 = ymm6[2,3,0,1]
Expand All @@ -1883,7 +1883,7 @@ define void @load_i32_stride5_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX1-ONLY-NEXT: # ymm4 = ymm15[0,1,2,3],mem[4,5],ymm15[6,7]
; AVX1-ONLY-NEXT: vextractf128 $1, %ymm4, %xmm4
; AVX1-ONLY-NEXT: vblendps {{.*#+}} xmm4 = xmm4[0,1],mem[2],xmm4[3]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} xmm4 = xmm4[1,0]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm4[0,1,2],ymm0[3,4,5,6,7]
; AVX1-ONLY-NEXT: vmovaps 128(%rdi), %ymm9
; AVX1-ONLY-NEXT: vperm2f128 {{.*#+}} ymm13 = ymm9[2,3,0,1]
Expand All @@ -1899,7 +1899,7 @@ define void @load_i32_stride5_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX1-ONLY-NEXT: # ymm4 = ymm1[0,1,2,3],mem[4,5],ymm1[6,7]
; AVX1-ONLY-NEXT: vextractf128 $1, %ymm4, %xmm4
; AVX1-ONLY-NEXT: vblendps {{.*#+}} xmm4 = xmm4[0,1],mem[2],xmm4[3]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} xmm4 = xmm4[1,0]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm4[0,1,2],ymm0[3,4,5,6,7]
; AVX1-ONLY-NEXT: vmovaps 448(%rdi), %ymm4
; AVX1-ONLY-NEXT: vperm2f128 {{.*#+}} ymm10 = ymm4[2,3,0,1]
Expand Down Expand Up @@ -3865,7 +3865,7 @@ define void @load_i32_stride5_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX1-ONLY-NEXT: # ymm3 = mem[0,1,2,3],ymm1[4,5],mem[6,7]
; AVX1-ONLY-NEXT: vextractf128 $1, %ymm3, %xmm3
; AVX1-ONLY-NEXT: vblendps {{.*#+}} xmm3 = xmm3[0,1],mem[2],xmm3[3]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} xmm3 = xmm3[1,0]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm3[0,1,2],ymm0[3,4,5,6,7]
; AVX1-ONLY-NEXT: vmovaps 288(%rdi), %ymm1
; AVX1-ONLY-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
Expand All @@ -3884,7 +3884,7 @@ define void @load_i32_stride5_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX1-ONLY-NEXT: # ymm1 = mem[0,1,2,3],ymm1[4,5],mem[6,7]
; AVX1-ONLY-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-ONLY-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0,1],mem[2],xmm1[3]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
; AVX1-ONLY-NEXT: vmovaps 608(%rdi), %ymm2
; AVX1-ONLY-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
Expand All @@ -3903,7 +3903,7 @@ define void @load_i32_stride5_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX1-ONLY-NEXT: # ymm1 = ymm1[0,1,2,3],mem[4,5],ymm1[6,7]
; AVX1-ONLY-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-ONLY-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0,1],mem[2],xmm1[3]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
; AVX1-ONLY-NEXT: vmovaps 928(%rdi), %ymm2
; AVX1-ONLY-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
Expand All @@ -3922,7 +3922,7 @@ define void @load_i32_stride5_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX1-ONLY-NEXT: # ymm1 = mem[0,1,2,3],ymm1[4,5],mem[6,7]
; AVX1-ONLY-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-ONLY-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0,1],mem[2],xmm1[3]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
; AVX1-ONLY-NEXT: vmovaps 1248(%rdi), %ymm1
; AVX1-ONLY-NEXT: vmovups %ymm1, (%rsp) # 32-byte Spill
Expand All @@ -3940,7 +3940,7 @@ define void @load_i32_stride5_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX1-ONLY-NEXT: # ymm1 = mem[0,1,2,3],ymm1[4,5],mem[6,7]
; AVX1-ONLY-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-ONLY-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0,1],mem[2],xmm1[3]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
; AVX1-ONLY-NEXT: vmovaps 1088(%rdi), %ymm2
; AVX1-ONLY-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
Expand All @@ -3959,7 +3959,7 @@ define void @load_i32_stride5_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX1-ONLY-NEXT: # ymm1 = ymm1[0,1,2,3],mem[4,5],ymm1[6,7]
; AVX1-ONLY-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-ONLY-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0,1],mem[2],xmm1[3]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6,7]
; AVX1-ONLY-NEXT: vmovaps 128(%rdi), %ymm3
; AVX1-ONLY-NEXT: vperm2f128 {{.*#+}} ymm9 = ymm3[2,3,0,1]
Expand All @@ -3975,7 +3975,7 @@ define void @load_i32_stride5_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX1-ONLY-NEXT: # ymm2 = ymm0[0,1,2,3],mem[4,5],ymm0[6,7]
; AVX1-ONLY-NEXT: vextractf128 $1, %ymm2, %xmm2
; AVX1-ONLY-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0,1],mem[2],xmm2[3]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2],ymm1[3,4,5,6,7]
; AVX1-ONLY-NEXT: vmovaps 448(%rdi), %ymm6
; AVX1-ONLY-NEXT: vperm2f128 {{.*#+}} ymm12 = ymm6[2,3,0,1]
Expand All @@ -3991,7 +3991,7 @@ define void @load_i32_stride5_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX1-ONLY-NEXT: # ymm5 = ymm0[0,1,2,3],mem[4,5],ymm0[6,7]
; AVX1-ONLY-NEXT: vextractf128 $1, %ymm5, %xmm5
; AVX1-ONLY-NEXT: vblendps {{.*#+}} xmm5 = xmm5[0,1],mem[2],xmm5[3]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} xmm5 = xmm5[1,0]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm5[0,1,2],ymm2[3,4,5,6,7]
; AVX1-ONLY-NEXT: vmovaps 768(%rdi), %ymm8
; AVX1-ONLY-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm8[2,3,0,1]
Expand Down
60 changes: 30 additions & 30 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-3.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ define void @store_i32_stride4_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX1-ONLY-NEXT: vshufps {{.*#+}} ymm2 = ymm2[0,1,3,2,4,5,7,6]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm2 = ymm5[0,1,2],ymm2[3],ymm5[4,5],ymm2[6],ymm5[7]
; AVX1-ONLY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,2,3,7,6,6,7]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} ymm3 = ymm4[1,0,3,2]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} ymm3 = ymm4[1,0,3,2]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm3[0],ymm0[1],ymm3[2,3],ymm0[4],ymm3[5,6,7]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3],ymm0[4,5],ymm2[6,7]
; AVX1-ONLY-NEXT: vmovaps %ymm0, 32(%r8)
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-5.ll
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ define void @store_i32_stride5_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX1-ONLY-NEXT: vpermilps {{.*#+}} ymm5 = ymm5[u,u,1,0,u,u,u,6]
; AVX1-ONLY-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
; AVX1-ONLY-NEXT: vunpcklps {{.*#+}} ymm7 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[4],ymm1[4],ymm4[5],ymm1[5]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} ymm7 = ymm7[0,0,3,3]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} ymm7 = ymm7[0,0,3,3]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm5 = ymm7[0,1],ymm5[2,3],ymm7[4,5,6],ymm5[7]
; AVX1-ONLY-NEXT: vbroadcastf128 {{.*#+}} ymm7 = mem[0,1,0,1]
; AVX1-ONLY-NEXT: vinsertf128 $1, %xmm7, %ymm0, %ymm0
Expand Down Expand Up @@ -444,7 +444,7 @@ define void @store_i32_stride5_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX1-ONLY-NEXT: vshufps {{.*#+}} ymm3 = ymm3[3,0,2,3,7,4,6,7]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4],ymm3[5,6,7]
; AVX1-ONLY-NEXT: vshufps {{.*#+}} ymm3 = ymm4[0,1,3,0,4,5,7,4]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} ymm2 = ymm2[1,0,2,2]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} ymm2 = ymm2[1,0,2,2]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm3[2],ymm2[3,4,5,6],ymm3[7]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3],ymm1[4,5],ymm2[6,7]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7]
Expand Down Expand Up @@ -962,14 +962,14 @@ define void @store_i32_stride5_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX1-ONLY-NEXT: vshufps {{.*#+}} ymm1 = ymm3[3,0,2,3,7,4,6,7]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4],ymm1[5,6,7]
; AVX1-ONLY-NEXT: vshufps {{.*#+}} ymm3 = ymm9[0,1,3,0,4,5,7,4]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} ymm2 = ymm6[1,0,2,2]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} ymm2 = ymm6[1,0,2,2]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm3[2],ymm2[3,4,5,6],ymm3[7]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3],ymm1[4,5],ymm2[6,7]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2],ymm12[3],ymm1[4,5,6,7]
; AVX1-ONLY-NEXT: vshufps {{.*#+}} ymm2 = ymm15[3,0,2,3,7,4,6,7]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm5[4],ymm2[5,6,7]
; AVX1-ONLY-NEXT: vshufps {{.*#+}} ymm3 = ymm14[0,1,3,0,4,5,7,4]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} ymm9 = ymm13[1,0,2,2]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} ymm9 = ymm13[1,0,2,2]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm3 = ymm9[0,1],ymm3[2],ymm9[3,4,5,6],ymm3[7]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0],ymm3[1,2,3],ymm2[4,5],ymm3[6,7]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2],ymm4[3],ymm2[4,5,6,7]
Expand Down Expand Up @@ -2131,7 +2131,7 @@ define void @store_i32_stride5_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX1-ONLY-NEXT: # ymm1 = ymm1[0,1,2,3],mem[4],ymm1[5,6,7]
; AVX1-ONLY-NEXT: vpermilps $52, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
; AVX1-ONLY-NEXT: # ymm0 = mem[0,1,3,0,4,5,7,4]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} ymm13 = ymm13[1,0,2,2]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} ymm13 = ymm13[1,0,2,2]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm13[0,1],ymm0[2],ymm13[3,4,5,6],ymm0[7]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3],ymm1[4,5],ymm0[6,7]
; AVX1-ONLY-NEXT: vblendps $8, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
Expand All @@ -2150,14 +2150,14 @@ define void @store_i32_stride5_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX1-ONLY-NEXT: vshufps {{.*#+}} ymm6 = ymm6[3,0,2,3,7,4,6,7]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm6 = ymm6[0,1,2,3],ymm7[4],ymm6[5,6,7]
; AVX1-ONLY-NEXT: vshufps {{.*#+}} ymm7 = ymm9[0,1,3,0,4,5,7,4]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} ymm9 = ymm11[1,0,2,2]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} ymm9 = ymm11[1,0,2,2]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm7 = ymm9[0,1],ymm7[2],ymm9[3,4,5,6],ymm7[7]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm6 = ymm6[0],ymm7[1,2,3],ymm6[4,5],ymm7[6,7]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm6 = ymm6[0,1,2],ymm8[3],ymm6[4,5,6,7]
; AVX1-ONLY-NEXT: vshufps {{.*#+}} ymm7 = ymm15[3,0,2,3,7,4,6,7]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm2 = ymm7[0,1,2,3],ymm2[4],ymm7[5,6,7]
; AVX1-ONLY-NEXT: vshufps {{.*#+}} ymm3 = ymm3[0,1,3,0,4,5,7,4]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} ymm7 = ymm14[1,0,2,2]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} ymm7 = ymm14[1,0,2,2]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm3 = ymm7[0,1],ymm3[2],ymm7[3,4,5,6],ymm3[7]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0],ymm3[1,2,3],ymm2[4,5],ymm3[6,7]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2],ymm4[3],ymm2[4,5,6,7]
Expand Down Expand Up @@ -4570,7 +4570,7 @@ define void @store_i32_stride5_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX1-ONLY-NEXT: # ymm3 = ymm3[0,1,2,3],mem[4],ymm3[5,6,7]
; AVX1-ONLY-NEXT: vpermilps $52, {{[-0-9]+}}(%r{{[sb]}}p), %ymm6 # 32-byte Folded Reload
; AVX1-ONLY-NEXT: # ymm6 = mem[0,1,3,0,4,5,7,4]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} ymm10 = ymm15[1,0,2,2]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} ymm10 = ymm15[1,0,2,2]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm6 = ymm10[0,1],ymm6[2],ymm10[3,4,5,6],ymm6[7]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0],ymm6[1,2,3],ymm3[4,5],ymm6[6,7]
; AVX1-ONLY-NEXT: vblendps $8, {{[-0-9]+}}(%r{{[sb]}}p), %ymm3, %ymm3 # 32-byte Folded Reload
Expand All @@ -4593,7 +4593,7 @@ define void @store_i32_stride5_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX1-ONLY-NEXT: # ymm3 = ymm3[0,1,2,3],mem[4],ymm3[5,6,7]
; AVX1-ONLY-NEXT: vpermilps $52, {{[-0-9]+}}(%r{{[sb]}}p), %ymm6 # 32-byte Folded Reload
; AVX1-ONLY-NEXT: # ymm6 = mem[0,1,3,0,4,5,7,4]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} ymm15 = ymm13[1,0,2,2]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} ymm15 = ymm13[1,0,2,2]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm6 = ymm15[0,1],ymm6[2],ymm15[3,4,5,6],ymm6[7]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0],ymm6[1,2,3],ymm3[4,5],ymm6[6,7]
; AVX1-ONLY-NEXT: vblendps $8, {{[-0-9]+}}(%r{{[sb]}}p), %ymm3, %ymm6 # 32-byte Folded Reload
Expand Down Expand Up @@ -4644,7 +4644,7 @@ define void @store_i32_stride5_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX1-ONLY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,0,2,3,7,4,6,7]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6,7]
; AVX1-ONLY-NEXT: vshufps {{.*#+}} ymm1 = ymm4[0,1,3,0,4,5,7,4]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} ymm4 = ymm5[1,0,2,2]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} ymm4 = ymm5[1,0,2,2]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1],ymm1[2],ymm4[3,4,5,6],ymm1[7]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4,5],ymm1[6,7]
; AVX1-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm2[3],ymm0[4,5,6,7]
Expand Down
94 changes: 47 additions & 47 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-3.ll

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-7.ll
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ define void @store_i64_stride7_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX1-ONLY-NEXT: vblendpd {{.*#+}} ymm9 = ymm0[0,1],ymm9[2,3]
; AVX1-ONLY-NEXT: vmovapd 48(%rdi), %xmm0
; AVX1-ONLY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],mem[2,3]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} ymm15 = ymm15[0,0,3,2]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} ymm15 = ymm15[0,0,3,2]
; AVX1-ONLY-NEXT: vmovapd 32(%rax), %ymm1
; AVX1-ONLY-NEXT: vperm2f128 {{.*#+}} ymm15 = ymm1[2,3],ymm15[2,3]
; AVX1-ONLY-NEXT: vblendpd {{.*#+}} ymm2 = ymm15[0],ymm0[1],ymm15[2],ymm0[3]
Expand Down Expand Up @@ -2174,7 +2174,7 @@ define void @store_i64_stride7_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX1-ONLY-NEXT: vblendpd {{.*#+}} ymm3 = ymm6[0,1],ymm3[2,3]
; AVX1-ONLY-NEXT: vmovapd 112(%rdi), %xmm6
; AVX1-ONLY-NEXT: vblendpd {{.*#+}} ymm6 = ymm6[0,1],mem[2,3]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} ymm4 = ymm4[0,0,3,2]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} ymm4 = ymm4[0,0,3,2]
; AVX1-ONLY-NEXT: vmovapd 96(%rax), %ymm8
; AVX1-ONLY-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm8[2,3],ymm4[2,3]
; AVX1-ONLY-NEXT: vblendpd {{.*#+}} ymm4 = ymm4[0],ymm6[1],ymm4[2],ymm6[3]
Expand Down Expand Up @@ -5074,7 +5074,7 @@ define void @store_i64_stride7_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX1-ONLY-NEXT: vblendpd {{.*#+}} ymm4 = ymm15[0,1],ymm0[2,3]
; AVX1-ONLY-NEXT: vmovapd 240(%rdi), %xmm15
; AVX1-ONLY-NEXT: vblendpd {{.*#+}} ymm15 = ymm15[0,1],mem[2,3]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,3,2]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[0,0,3,2]
; AVX1-ONLY-NEXT: vmovapd 224(%rax), %ymm0
; AVX1-ONLY-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[2,3]
; AVX1-ONLY-NEXT: vblendpd {{.*#+}} ymm1 = ymm1[0],ymm15[1],ymm1[2],ymm15[3]
Expand Down Expand Up @@ -11161,7 +11161,7 @@ define void @store_i64_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX1-ONLY-NEXT: vmovupd %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX1-ONLY-NEXT: vmovapd 432(%rdi), %xmm0
; AVX1-ONLY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],mem[2,3]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} ymm2 = ymm2[0,0,3,2]
; AVX1-ONLY-NEXT: vmovapd 416(%rax), %ymm1
; AVX1-ONLY-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3],ymm2[2,3]
; AVX1-ONLY-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3]
Expand All @@ -11181,7 +11181,7 @@ define void @store_i64_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX1-ONLY-NEXT: vblendpd {{.*#+}} ymm9 = ymm3[0,1],ymm0[2,3]
; AVX1-ONLY-NEXT: vmovapd 464(%rdi), %xmm0
; AVX1-ONLY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm5[2,3]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} ymm2 = ymm2[0,0,3,2]
; AVX1-ONLY-NEXT: vmovapd 448(%rax), %ymm4
; AVX1-ONLY-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm4[2,3],ymm2[2,3]
; AVX1-ONLY-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3]
Expand All @@ -11200,7 +11200,7 @@ define void @store_i64_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX1-ONLY-NEXT: vblendpd {{.*#+}} ymm2 = ymm5[0,1],ymm2[2,3]
; AVX1-ONLY-NEXT: vmovapd 496(%rdi), %xmm5
; AVX1-ONLY-NEXT: vblendpd {{.*#+}} ymm5 = ymm5[0,1],mem[2,3]
; AVX1-ONLY-NEXT: vpermilpd {{.*#+}} ymm3 = ymm3[0,0,3,2]
; AVX1-ONLY-NEXT: vshufpd {{.*#+}} ymm3 = ymm3[0,0,3,2]
; AVX1-ONLY-NEXT: vmovapd 480(%rax), %ymm11
; AVX1-ONLY-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm11[2,3],ymm3[2,3]
; AVX1-ONLY-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0],ymm5[1],ymm3[2],ymm5[3]
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/vector-narrow-binop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ define <4 x double> @fmul_v2f64(<2 x double> %x, <2 x double> %y) {
; AVX1-NEXT: vmulpd %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vmulpd %xmm2, %xmm2, %xmm1
; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX1-NEXT: retq
;
; AVX2-LABEL: fmul_v2f64:
Expand All @@ -175,7 +175,7 @@ define <4 x double> @fmul_v2f64(<2 x double> %x, <2 x double> %y) {
; AVX2-NEXT: vmulpd %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vmulpd %xmm2, %xmm2, %xmm1
; AVX2-NEXT: vaddpd %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX2-NEXT: retq
;
; AVX512-LABEL: fmul_v2f64:
Expand All @@ -184,7 +184,7 @@ define <4 x double> @fmul_v2f64(<2 x double> %x, <2 x double> %y) {
; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512-NEXT: vmulpd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm2 * xmm2) + xmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512-NEXT: retq
%s = shufflevector <2 x double> %x, <2 x double> %y, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
%bo = fmul fast <4 x double> %s, %s
Expand Down
132 changes: 66 additions & 66 deletions llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll

Large diffs are not rendered by default.

344 changes: 172 additions & 172 deletions llvm/test/CodeGen/X86/vector-reduce-fadd.ll

Large diffs are not rendered by default.

28 changes: 14 additions & 14 deletions llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,15 @@ define float @test_v4f32(<4 x float> %a0) {
;
; AVX-LABEL: test_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v4f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
Expand Down Expand Up @@ -108,7 +108,7 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
Expand All @@ -119,7 +119,7 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
Expand Down Expand Up @@ -160,7 +160,7 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
Expand All @@ -173,7 +173,7 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
Expand All @@ -197,13 +197,13 @@ define double @test_v2f64(<2 x double> %a0) {
;
; AVX-LABEL: test_v2f64:
; AVX: # %bb.0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v2f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a0)
Expand All @@ -223,7 +223,7 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
Expand All @@ -232,7 +232,7 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand All @@ -256,7 +256,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
Expand All @@ -267,7 +267,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand Down Expand Up @@ -297,7 +297,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
Expand All @@ -309,7 +309,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,15 @@ define float @test_v4f32(<4 x float> %a0) {
;
; AVX-LABEL: test_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v4f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
Expand Down Expand Up @@ -113,7 +113,7 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
Expand All @@ -124,7 +124,7 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
Expand Down Expand Up @@ -165,7 +165,7 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
Expand All @@ -178,7 +178,7 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
Expand All @@ -202,13 +202,13 @@ define double @test_v2f64(<2 x double> %a0) {
;
; AVX-LABEL: test_v2f64:
; AVX: # %bb.0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v2f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = call nnan double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a0)
Expand Down Expand Up @@ -238,7 +238,7 @@ define double @test_v3f64(<3 x double> %a0) {
;
; AVX-LABEL: test_v3f64:
; AVX: # %bb.0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm1
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm0
Expand All @@ -247,7 +247,7 @@ define double @test_v3f64(<3 x double> %a0) {
;
; AVX512-LABEL: test_v3f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm0
Expand All @@ -270,7 +270,7 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
Expand All @@ -279,7 +279,7 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand All @@ -303,7 +303,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
Expand All @@ -314,7 +314,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand Down Expand Up @@ -344,7 +344,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
Expand All @@ -356,7 +356,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand Down
84 changes: 42 additions & 42 deletions llvm/test/CodeGen/X86/vector-reduce-fmax.ll
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ define float @test_v3f32(<3 x float> %a0) {
; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm3
; AVX-NEXT: vblendvps %xmm3, %xmm1, %xmm2, %xmm1
; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm1
; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
Expand All @@ -125,7 +125,7 @@ define float @test_v3f32(<3 x float> %a0) {
; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1
; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
; AVX512-NEXT: vcmpunordss %xmm2, %xmm2, %k1
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxss %xmm2, %xmm1, %xmm0
; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
; AVX512-NEXT: retq
Expand Down Expand Up @@ -198,7 +198,7 @@ define float @test_v4f32(<4 x float> %a0) {
; AVX-LABEL: test_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
; AVX-NEXT: vmaxss %xmm0, %xmm3, %xmm4
; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
Expand All @@ -214,7 +214,7 @@ define float @test_v4f32(<4 x float> %a0) {
; AVX512-LABEL: test_v4f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
; AVX512-NEXT: vmaxss %xmm0, %xmm3, %xmm4
; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1
Expand Down Expand Up @@ -303,10 +303,10 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vshufps {{.*#+}} xmm2 = xmm1[3,3,3,3]
; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm3 = xmm1[1,0]
; AVX-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
; AVX-NEXT: vshufps {{.*#+}} xmm5 = xmm0[3,3,3,3]
; AVX-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm6 = xmm0[1,0]
; AVX-NEXT: vmovshdup {{.*#+}} xmm7 = xmm0[1,1,3,3]
; AVX-NEXT: vmaxss %xmm0, %xmm7, %xmm8
; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
Expand Down Expand Up @@ -336,10 +336,10 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX512BW-NEXT: vshufps {{.*#+}} xmm1 = xmm3[3,3,3,3]
; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm3[1,0]
; AVX512BW-NEXT: vshufpd {{.*#+}} xmm2 = xmm3[1,0]
; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3]
; AVX512BW-NEXT: vshufps {{.*#+}} xmm5 = xmm0[3,3,3,3]
; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0]
; AVX512BW-NEXT: vshufpd {{.*#+}} xmm6 = xmm0[1,0]
; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm7 = xmm0[1,1,3,3]
; AVX512BW-NEXT: vmaxss %xmm0, %xmm7, %xmm8
; AVX512BW-NEXT: vcmpunordss %xmm0, %xmm0, %k1
Expand Down Expand Up @@ -369,10 +369,10 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vshufps {{.*#+}} xmm2 = xmm1[3,3,3,3]
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
; AVX512VL-NEXT: vshufpd {{.*#+}} xmm3 = xmm1[1,0]
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
; AVX512VL-NEXT: vshufps {{.*#+}} xmm5 = xmm0[3,3,3,3]
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0]
; AVX512VL-NEXT: vshufpd {{.*#+}} xmm6 = xmm0[1,0]
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm7 = xmm0[1,1,3,3]
; AVX512VL-NEXT: vmaxss %xmm0, %xmm7, %xmm8
; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1
Expand Down Expand Up @@ -502,7 +502,7 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm3
; AVX-NEXT: vblendvps %xmm3, %xmm1, %xmm2, %xmm1
; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2
; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
; AVX-NEXT: vmaxss %xmm1, %xmm3, %xmm1
; AVX-NEXT: vblendvps %xmm2, %xmm3, %xmm1, %xmm1
; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2
Expand All @@ -518,7 +518,7 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX-NEXT: vmaxss %xmm1, %xmm3, %xmm1
; AVX-NEXT: vblendvps %xmm2, %xmm3, %xmm1, %xmm1
; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2
; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
; AVX-NEXT: vmaxss %xmm1, %xmm3, %xmm1
; AVX-NEXT: vblendvps %xmm2, %xmm3, %xmm1, %xmm1
; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2
Expand All @@ -535,7 +535,7 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512BW-NEXT: vmaxss %xmm0, %xmm2, %xmm3
; AVX512BW-NEXT: vcmpunordss %xmm0, %xmm0, %k1
; AVX512BW-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1}
; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512BW-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1
; AVX512BW-NEXT: vmaxss %xmm3, %xmm2, %xmm3
; AVX512BW-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1}
Expand All @@ -550,7 +550,7 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1
; AVX512BW-NEXT: vmaxss %xmm3, %xmm2, %xmm3
; AVX512BW-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1}
; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
; AVX512BW-NEXT: vshufpd {{.*#+}} xmm2 = xmm1[1,0]
; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1
; AVX512BW-NEXT: vmaxss %xmm3, %xmm2, %xmm3
; AVX512BW-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1}
Expand All @@ -566,7 +566,7 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1
; AVX512BW-NEXT: vmaxss %xmm3, %xmm1, %xmm3
; AVX512BW-NEXT: vmovss %xmm1, %xmm3, %xmm3 {%k1}
; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0]
; AVX512BW-NEXT: vshufpd {{.*#+}} xmm1 = xmm2[1,0]
; AVX512BW-NEXT: vcmpunordss %xmm3, %xmm3, %k1
; AVX512BW-NEXT: vmaxss %xmm3, %xmm1, %xmm3
; AVX512BW-NEXT: vmovss %xmm1, %xmm3, %xmm3 {%k1}
Expand All @@ -582,7 +582,7 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512BW-NEXT: vcmpunordss %xmm2, %xmm2, %k1
; AVX512BW-NEXT: vmaxss %xmm2, %xmm1, %xmm2
; AVX512BW-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512BW-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512BW-NEXT: vcmpunordss %xmm2, %xmm2, %k1
; AVX512BW-NEXT: vmaxss %xmm2, %xmm1, %xmm2
; AVX512BW-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
Expand All @@ -597,18 +597,18 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextractf32x4 $3, %zmm0, %xmm3
; AVX512VL-NEXT: vshufps {{.*#+}} xmm1 = xmm3[3,3,3,3]
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm3[1,0]
; AVX512VL-NEXT: vshufpd {{.*#+}} xmm2 = xmm3[1,0]
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3]
; AVX512VL-NEXT: vextractf32x4 $2, %zmm0, %xmm6
; AVX512VL-NEXT: vshufps {{.*#+}} xmm5 = xmm6[3,3,3,3]
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm7 = xmm6[1,0]
; AVX512VL-NEXT: vshufpd {{.*#+}} xmm7 = xmm6[1,0]
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm8 = xmm6[1,1,3,3]
; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm9
; AVX512VL-NEXT: vshufps {{.*#+}} xmm10 = xmm9[3,3,3,3]
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm11 = xmm9[1,0]
; AVX512VL-NEXT: vshufpd {{.*#+}} xmm11 = xmm9[1,0]
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm12 = xmm9[1,1,3,3]
; AVX512VL-NEXT: vshufps {{.*#+}} xmm13 = xmm0[3,3,3,3]
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm14 = xmm0[1,0]
; AVX512VL-NEXT: vshufpd {{.*#+}} xmm14 = xmm0[1,0]
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm15 = xmm0[1,1,3,3]
; AVX512VL-NEXT: vmaxss %xmm0, %xmm15, %xmm16
; AVX512VL-NEXT: vcmpunordss %xmm0, %xmm0, %k1
Expand Down Expand Up @@ -682,15 +682,15 @@ define double @test_v2f64(<2 x double> %a0) {
;
; AVX-LABEL: test_v2f64:
; AVX: # %bb.0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v2f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm0, %xmm2, %xmm1
; AVX512-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
; AVX512-NEXT: vmovsd %xmm2, %xmm1, %xmm1 {%k1}
Expand Down Expand Up @@ -740,8 +740,8 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX-LABEL: test_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm2 = xmm1[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm0, %xmm3, %xmm4
; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
; AVX-NEXT: vblendvpd %xmm0, %xmm3, %xmm4, %xmm0
Expand All @@ -757,8 +757,8 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX512-LABEL: test_v4f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm2 = xmm1[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm0, %xmm3, %xmm4
; AVX512-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
; AVX512-NEXT: vmovsd %xmm3, %xmm4, %xmm4 {%k1}
Expand Down Expand Up @@ -838,7 +838,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX-NEXT: vmaxpd %ymm0, %ymm1, %ymm2
; AVX-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0
; AVX-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm3
; AVX-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1
Expand All @@ -847,7 +847,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm1
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
; AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm2
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm1
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vzeroupper
Expand All @@ -856,12 +856,12 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX512BW-LABEL: test_v8f64:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vextractf32x4 $3, %zmm0, %xmm2
; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0]
; AVX512BW-NEXT: vshufpd {{.*#+}} xmm1 = xmm2[1,0]
; AVX512BW-NEXT: vextractf32x4 $2, %zmm0, %xmm3
; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
; AVX512BW-NEXT: vshufpd {{.*#+}} xmm4 = xmm3[1,0]
; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm5
; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm6 = xmm5[1,0]
; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm7 = xmm0[1,0]
; AVX512BW-NEXT: vshufpd {{.*#+}} xmm6 = xmm5[1,0]
; AVX512BW-NEXT: vshufpd {{.*#+}} xmm7 = xmm0[1,0]
; AVX512BW-NEXT: vmaxsd %xmm0, %xmm7, %xmm8
; AVX512BW-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
; AVX512BW-NEXT: vmovsd %xmm7, %xmm8, %xmm8 {%k1}
Expand Down Expand Up @@ -889,12 +889,12 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX512VL-LABEL: test_v8f64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextractf32x4 $3, %zmm0, %xmm1
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
; AVX512VL-NEXT: vshufpd {{.*#+}} xmm2 = xmm1[1,0]
; AVX512VL-NEXT: vextractf32x4 $2, %zmm0, %xmm3
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
; AVX512VL-NEXT: vshufpd {{.*#+}} xmm4 = xmm3[1,0]
; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm5
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm6 = xmm5[1,0]
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm7 = xmm0[1,0]
; AVX512VL-NEXT: vshufpd {{.*#+}} xmm6 = xmm5[1,0]
; AVX512VL-NEXT: vshufpd {{.*#+}} xmm7 = xmm0[1,0]
; AVX512VL-NEXT: vmaxsd %xmm0, %xmm7, %xmm8
; AVX512VL-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
; AVX512VL-NEXT: vmovsd %xmm7, %xmm8, %xmm8 {%k1}
Expand Down Expand Up @@ -1036,7 +1036,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX-NEXT: vmaxpd %ymm0, %ymm1, %ymm2
; AVX-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0
; AVX-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm3
; AVX-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1
Expand All @@ -1045,7 +1045,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm1
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
; AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm2
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm1
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vzeroupper
Expand All @@ -1056,7 +1056,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX512-NEXT: vmaxpd %zmm0, %zmm1, %zmm2
; AVX512-NEXT: vcmpunordpd %zmm0, %zmm0, %k1
; AVX512-NEXT: vmovapd %zmm1, %zmm2 {%k1}
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm2[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm0 = xmm2[1,0]
; AVX512-NEXT: vmaxsd %xmm2, %xmm0, %xmm1
; AVX512-NEXT: vcmpunordsd %xmm2, %xmm2, %k1
; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1}
Expand All @@ -1065,23 +1065,23 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-NEXT: vcmpunordsd %xmm1, %xmm1, %k1
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-NEXT: vcmpunordsd %xmm1, %xmm1, %k1
; AVX512-NEXT: vextractf32x4 $2, %zmm2, %xmm0
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-NEXT: vcmpunordsd %xmm1, %xmm1, %k1
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-NEXT: vcmpunordsd %xmm1, %xmm1, %k1
; AVX512-NEXT: vextractf32x4 $3, %zmm2, %xmm0
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-NEXT: vcmpunordsd %xmm1, %xmm1, %k1
; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm1, %xmm2, %xmm0
; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm0 {%k1}
; AVX512-NEXT: vzeroupper
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,15 @@ define float @test_v3f32(<3 x float> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vminss %xmm1, %xmm0, %xmm1
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-NEXT: vminss %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v3f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
%1 = call nnan float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a0)
Expand Down Expand Up @@ -111,15 +111,15 @@ define float @test_v4f32(<4 x float> %a0) {
;
; AVX-LABEL: test_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v4f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
Expand Down Expand Up @@ -154,7 +154,7 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
Expand All @@ -165,7 +165,7 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
Expand Down Expand Up @@ -206,7 +206,7 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
Expand All @@ -219,7 +219,7 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
Expand All @@ -243,13 +243,13 @@ define double @test_v2f64(<2 x double> %a0) {
;
; AVX-LABEL: test_v2f64:
; AVX: # %bb.0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v2f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = call nnan double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a0)
Expand All @@ -269,7 +269,7 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
Expand All @@ -278,7 +278,7 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand All @@ -302,7 +302,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
Expand All @@ -313,7 +313,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand Down Expand Up @@ -343,7 +343,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
Expand All @@ -355,7 +355,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand Down
Loading