154 changes: 44 additions & 110 deletions llvm/test/CodeGen/X86/fpclamptosat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -699,34 +699,23 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) nounwind {
; AVX2-LABEL: stest_f16i32:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
; AVX2-NEXT: vpextrw $0, %xmm1, %eax
; AVX2-NEXT: movzwl %ax, %eax
; AVX2-NEXT: vmovd %eax, %xmm1
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX2-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX2-NEXT: vpextrw $0, %xmm2, %eax
; AVX2-NEXT: movzwl %ax, %eax
; AVX2-NEXT: vmovd %eax, %xmm2
; AVX2-NEXT: vcvttss2si %xmm1, %rax
; AVX2-NEXT: vcvtph2ps %xmm2, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[4,5],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX2-NEXT: vcvttss2si %xmm1, %rcx
; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vcvttss2si %xmm1, %rax
; AVX2-NEXT: vmovq %rax, %xmm1
; AVX2-NEXT: vmovq %rcx, %xmm1
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX2-NEXT: vpextrw $0, %xmm0, %eax
; AVX2-NEXT: movzwl %ax, %eax
; AVX2-NEXT: vmovd %eax, %xmm2
; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX2-NEXT: vpextrw $0, %xmm0, %eax
; AVX2-NEXT: movzwl %ax, %eax
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vcvttss2si %xmm2, %rax
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vcvttss2si %xmm0, %rax
; AVX2-NEXT: vmovq %rax, %xmm0
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2147483647,2147483647,2147483647,2147483647]
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
Expand Down Expand Up @@ -849,9 +838,6 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
; AVX2-LABEL: utesth_f16i32:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
; AVX2-NEXT: vpextrw $0, %xmm1, %eax
; AVX2-NEXT: movzwl %ax, %eax
; AVX2-NEXT: vmovd %eax, %xmm1
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2
; AVX2-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
Expand All @@ -860,37 +846,29 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
; AVX2-NEXT: movq %rcx, %rdx
; AVX2-NEXT: sarq $63, %rdx
; AVX2-NEXT: andq %rax, %rdx
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX2-NEXT: vpextrw $0, %xmm2, %eax
; AVX2-NEXT: movzwl %ax, %eax
; AVX2-NEXT: vmovd %eax, %xmm2
; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[4,5],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
; AVX2-NEXT: vcvttss2si %xmm3, %rax
; AVX2-NEXT: vmovq %rdx, %xmm3
; AVX2-NEXT: vcvttss2si %xmm2, %rcx
; AVX2-NEXT: vmovq %rdx, %xmm2
; AVX2-NEXT: vpextrw $0, %xmm0, %edx
; AVX2-NEXT: movzwl %dx, %edx
; AVX2-NEXT: vmovd %edx, %xmm3
; AVX2-NEXT: movq %rcx, %rdx
; AVX2-NEXT: sarq $63, %rdx
; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3
; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX2-NEXT: andq %rax, %rdx
; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4
; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm4
; AVX2-NEXT: vcvttss2si %xmm4, %rax
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vmovq %rdx, %xmm4
; AVX2-NEXT: vcvttss2si %xmm3, %rcx
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm4[0],xmm2[0]
; AVX2-NEXT: vcvttss2si %xmm2, %rcx
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm4[0],xmm3[0]
; AVX2-NEXT: movq %rcx, %rdx
; AVX2-NEXT: sarq $63, %rdx
; AVX2-NEXT: andq %rax, %rdx
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX2-NEXT: vpextrw $0, %xmm0, %eax
; AVX2-NEXT: movzwl %ax, %eax
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vcvttss2si %xmm1, %rax
Expand All @@ -901,7 +879,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
; AVX2-NEXT: andq %rax, %rdx
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vmovq %rdx, %xmm1
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
Expand Down Expand Up @@ -1024,34 +1002,23 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) nounwind {
; AVX2-LABEL: ustest_f16i32:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
; AVX2-NEXT: vpextrw $0, %xmm1, %eax
; AVX2-NEXT: movzwl %ax, %eax
; AVX2-NEXT: vmovd %eax, %xmm1
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX2-NEXT: vcvttss2si %xmm1, %rax
; AVX2-NEXT: vmovq %rax, %xmm1
; AVX2-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX2-NEXT: vpextrw $0, %xmm2, %eax
; AVX2-NEXT: movzwl %ax, %eax
; AVX2-NEXT: vmovd %eax, %xmm2
; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[4,5],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX2-NEXT: vcvttss2si %xmm2, %rax
; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX2-NEXT: vpextrw $0, %xmm0, %eax
; AVX2-NEXT: movzwl %ax, %eax
; AVX2-NEXT: vmovd %eax, %xmm2
; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX2-NEXT: vcvttss2si %xmm2, %rax
; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX2-NEXT: vpextrw $0, %xmm0, %eax
; AVX2-NEXT: movzwl %ax, %eax
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX2-NEXT: vcvttss2si %xmm0, %rax
; AVX2-NEXT: vmovq %rax, %xmm0
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
Expand Down Expand Up @@ -3347,34 +3314,23 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) nounwind {
; AVX2-LABEL: stest_f16i32_mm:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
; AVX2-NEXT: vpextrw $0, %xmm1, %eax
; AVX2-NEXT: movzwl %ax, %eax
; AVX2-NEXT: vmovd %eax, %xmm1
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX2-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX2-NEXT: vpextrw $0, %xmm2, %eax
; AVX2-NEXT: movzwl %ax, %eax
; AVX2-NEXT: vmovd %eax, %xmm2
; AVX2-NEXT: vcvttss2si %xmm1, %rax
; AVX2-NEXT: vcvtph2ps %xmm2, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[4,5],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX2-NEXT: vcvttss2si %xmm1, %rcx
; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vcvttss2si %xmm1, %rax
; AVX2-NEXT: vmovq %rax, %xmm1
; AVX2-NEXT: vmovq %rcx, %xmm1
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX2-NEXT: vpextrw $0, %xmm0, %eax
; AVX2-NEXT: movzwl %ax, %eax
; AVX2-NEXT: vmovd %eax, %xmm2
; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX2-NEXT: vpextrw $0, %xmm0, %eax
; AVX2-NEXT: movzwl %ax, %eax
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vcvttss2si %xmm2, %rax
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vcvttss2si %xmm0, %rax
; AVX2-NEXT: vmovq %rax, %xmm0
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2147483647,2147483647,2147483647,2147483647]
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
Expand Down Expand Up @@ -3495,9 +3451,6 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
; AVX2-LABEL: utesth_f16i32_mm:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
; AVX2-NEXT: vpextrw $0, %xmm1, %eax
; AVX2-NEXT: movzwl %ax, %eax
; AVX2-NEXT: vmovd %eax, %xmm1
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2
; AVX2-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
Expand All @@ -3506,37 +3459,29 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
; AVX2-NEXT: movq %rcx, %rdx
; AVX2-NEXT: sarq $63, %rdx
; AVX2-NEXT: andq %rax, %rdx
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX2-NEXT: vpextrw $0, %xmm2, %eax
; AVX2-NEXT: movzwl %ax, %eax
; AVX2-NEXT: vmovd %eax, %xmm2
; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[4,5],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
; AVX2-NEXT: vcvttss2si %xmm3, %rax
; AVX2-NEXT: vmovq %rdx, %xmm3
; AVX2-NEXT: vcvttss2si %xmm2, %rcx
; AVX2-NEXT: vmovq %rdx, %xmm2
; AVX2-NEXT: vpextrw $0, %xmm0, %edx
; AVX2-NEXT: movzwl %dx, %edx
; AVX2-NEXT: vmovd %edx, %xmm3
; AVX2-NEXT: movq %rcx, %rdx
; AVX2-NEXT: sarq $63, %rdx
; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3
; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX2-NEXT: andq %rax, %rdx
; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4
; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm4
; AVX2-NEXT: vcvttss2si %xmm4, %rax
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vmovq %rdx, %xmm4
; AVX2-NEXT: vcvttss2si %xmm3, %rcx
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm4[0],xmm2[0]
; AVX2-NEXT: vcvttss2si %xmm2, %rcx
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm4[0],xmm3[0]
; AVX2-NEXT: movq %rcx, %rdx
; AVX2-NEXT: sarq $63, %rdx
; AVX2-NEXT: andq %rax, %rdx
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX2-NEXT: vpextrw $0, %xmm0, %eax
; AVX2-NEXT: movzwl %ax, %eax
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vcvttss2si %xmm1, %rax
Expand All @@ -3547,7 +3492,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
; AVX2-NEXT: andq %rax, %rdx
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vmovq %rdx, %xmm1
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm1
Expand Down Expand Up @@ -3669,34 +3614,23 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) nounwind {
; AVX2-LABEL: ustest_f16i32_mm:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
; AVX2-NEXT: vpextrw $0, %xmm1, %eax
; AVX2-NEXT: movzwl %ax, %eax
; AVX2-NEXT: vmovd %eax, %xmm1
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX2-NEXT: vcvttss2si %xmm1, %rax
; AVX2-NEXT: vmovq %rax, %xmm1
; AVX2-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX2-NEXT: vpextrw $0, %xmm2, %eax
; AVX2-NEXT: movzwl %ax, %eax
; AVX2-NEXT: vmovd %eax, %xmm2
; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[4,5],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX2-NEXT: vcvttss2si %xmm2, %rax
; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX2-NEXT: vpextrw $0, %xmm0, %eax
; AVX2-NEXT: movzwl %ax, %eax
; AVX2-NEXT: vmovd %eax, %xmm2
; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX2-NEXT: vcvttss2si %xmm2, %rax
; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX2-NEXT: vpextrw $0, %xmm0, %eax
; AVX2-NEXT: movzwl %ax, %eax
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX2-NEXT: vcvttss2si %xmm0, %rax
; AVX2-NEXT: vmovq %rax, %xmm0
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
Expand Down
123 changes: 41 additions & 82 deletions llvm/test/CodeGen/X86/half.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1614,31 +1614,21 @@ define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 {
;
; BWON-F16C-LABEL: maxnum_v8f16:
; BWON-F16C: # %bb.0:
; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; BWON-F16C-NEXT: vpextrw $0, %xmm2, %eax
; BWON-F16C-NEXT: movzwl %ax, %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm2
; BWON-F16C-NEXT: vmovdqa {{.*#+}} xmm3 = [10,11,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; BWON-F16C-NEXT: vpshufb %xmm3, %xmm1, %xmm2
; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2
; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; BWON-F16C-NEXT: vpextrw $0, %xmm3, %eax
; BWON-F16C-NEXT: movzwl %ax, %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm3
; BWON-F16C-NEXT: vpshufb %xmm3, %xmm0, %xmm3
; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm3
; BWON-F16C-NEXT: vucomiss %xmm2, %xmm3
; BWON-F16C-NEXT: ja .LBB26_2
; BWON-F16C-NEXT: # %bb.1:
; BWON-F16C-NEXT: vmovaps %xmm2, %xmm3
; BWON-F16C-NEXT: .LBB26_2:
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm3, %xmm2
; BWON-F16C-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[3,3,3,3]
; BWON-F16C-NEXT: vpextrw $0, %xmm3, %eax
; BWON-F16C-NEXT: movzwl %ax, %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm3
; BWON-F16C-NEXT: vmovdqa {{.*#+}} xmm4 = [8,9,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; BWON-F16C-NEXT: vpshufb %xmm4, %xmm1, %xmm3
; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm3
; BWON-F16C-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[3,3,3,3]
; BWON-F16C-NEXT: vpextrw $0, %xmm4, %eax
; BWON-F16C-NEXT: movzwl %ax, %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm4
; BWON-F16C-NEXT: vpshufb %xmm4, %xmm0, %xmm4
; BWON-F16C-NEXT: vcvtph2ps %xmm4, %xmm4
; BWON-F16C-NEXT: vucomiss %xmm3, %xmm4
; BWON-F16C-NEXT: ja .LBB26_4
Expand All @@ -1648,49 +1638,33 @@ define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 {
; BWON-F16C-NEXT: vmovd %xmm2, %eax
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm4, %xmm2
; BWON-F16C-NEXT: vmovd %xmm2, %ecx
; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; BWON-F16C-NEXT: vpextrw $0, %xmm2, %edx
; BWON-F16C-NEXT: movzwl %dx, %edx
; BWON-F16C-NEXT: vmovd %edx, %xmm2
; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2
; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; BWON-F16C-NEXT: vpextrw $0, %xmm3, %edx
; BWON-F16C-NEXT: movzwl %dx, %edx
; BWON-F16C-NEXT: vmovd %edx, %xmm3
; BWON-F16C-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; BWON-F16C-NEXT: vpshufb %xmm2, %xmm1, %xmm3
; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm3
; BWON-F16C-NEXT: vucomiss %xmm2, %xmm3
; BWON-F16C-NEXT: vpshufb %xmm2, %xmm0, %xmm2
; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2
; BWON-F16C-NEXT: vucomiss %xmm3, %xmm2
; BWON-F16C-NEXT: ja .LBB26_6
; BWON-F16C-NEXT: # %bb.5:
; BWON-F16C-NEXT: vmovaps %xmm2, %xmm3
; BWON-F16C-NEXT: vmovaps %xmm3, %xmm2
; BWON-F16C-NEXT: .LBB26_6:
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm3, %xmm2
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm2, %xmm2
; BWON-F16C-NEXT: vmovd %xmm2, %edx
; BWON-F16C-NEXT: vshufpd {{.*#+}} xmm2 = xmm1[1,0]
; BWON-F16C-NEXT: vpextrw $0, %xmm2, %esi
; BWON-F16C-NEXT: movzwl %si, %esi
; BWON-F16C-NEXT: vmovd %esi, %xmm2
; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm3
; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2
; BWON-F16C-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
; BWON-F16C-NEXT: vpextrw $0, %xmm3, %esi
; BWON-F16C-NEXT: movzwl %si, %esi
; BWON-F16C-NEXT: vmovd %esi, %xmm3
; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm3
; BWON-F16C-NEXT: vucomiss %xmm2, %xmm3
; BWON-F16C-NEXT: vucomiss %xmm3, %xmm2
; BWON-F16C-NEXT: ja .LBB26_8
; BWON-F16C-NEXT: # %bb.7:
; BWON-F16C-NEXT: vmovaps %xmm2, %xmm3
; BWON-F16C-NEXT: vmovaps %xmm3, %xmm2
; BWON-F16C-NEXT: .LBB26_8:
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm3, %xmm2
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm2, %xmm2
; BWON-F16C-NEXT: vmovd %xmm2, %esi
; BWON-F16C-NEXT: vpsrlq $48, %xmm1, %xmm2
; BWON-F16C-NEXT: vpextrw $0, %xmm2, %edi
; BWON-F16C-NEXT: movzwl %di, %edi
; BWON-F16C-NEXT: vmovd %edi, %xmm2
; BWON-F16C-NEXT: vmovdqa {{.*#+}} xmm3 = [4,5,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; BWON-F16C-NEXT: vpshufb %xmm3, %xmm1, %xmm2
; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2
; BWON-F16C-NEXT: vpsrlq $48, %xmm0, %xmm3
; BWON-F16C-NEXT: vpextrw $0, %xmm3, %edi
; BWON-F16C-NEXT: movzwl %di, %edi
; BWON-F16C-NEXT: vmovd %edi, %xmm3
; BWON-F16C-NEXT: vpshufb %xmm3, %xmm0, %xmm3
; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm6
; BWON-F16C-NEXT: vucomiss %xmm2, %xmm6
; BWON-F16C-NEXT: ja .LBB26_10
Expand All @@ -1703,54 +1677,39 @@ define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 {
; BWON-F16C-NEXT: vpinsrw $0, %esi, %xmm0, %xmm5
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm6, %xmm6
; BWON-F16C-NEXT: vmovd %xmm6, %eax
; BWON-F16C-NEXT: vmovshdup {{.*#+}} xmm6 = xmm1[1,1,3,3]
; BWON-F16C-NEXT: vpextrw $0, %xmm6, %ecx
; BWON-F16C-NEXT: movzwl %cx, %ecx
; BWON-F16C-NEXT: vmovd %ecx, %xmm6
; BWON-F16C-NEXT: vpsrlq $48, %xmm1, %xmm6
; BWON-F16C-NEXT: vcvtph2ps %xmm6, %xmm7
; BWON-F16C-NEXT: vpsrlq $48, %xmm0, %xmm6
; BWON-F16C-NEXT: vcvtph2ps %xmm6, %xmm6
; BWON-F16C-NEXT: vmovshdup {{.*#+}} xmm7 = xmm0[1,1,3,3]
; BWON-F16C-NEXT: vpextrw $0, %xmm7, %ecx
; BWON-F16C-NEXT: movzwl %cx, %ecx
; BWON-F16C-NEXT: vmovd %ecx, %xmm7
; BWON-F16C-NEXT: vcvtph2ps %xmm7, %xmm7
; BWON-F16C-NEXT: vucomiss %xmm6, %xmm7
; BWON-F16C-NEXT: vucomiss %xmm7, %xmm6
; BWON-F16C-NEXT: ja .LBB26_12
; BWON-F16C-NEXT: # %bb.11:
; BWON-F16C-NEXT: vmovaps %xmm6, %xmm7
; BWON-F16C-NEXT: vmovaps %xmm7, %xmm6
; BWON-F16C-NEXT: .LBB26_12:
; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm4
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm7, %xmm5
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm6, %xmm5
; BWON-F16C-NEXT: vmovd %xmm5, %eax
; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm5
; BWON-F16C-NEXT: vpextrw $0, %xmm1, %eax
; BWON-F16C-NEXT: movzwl %ax, %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm6
; BWON-F16C-NEXT: vcvtph2ps %xmm6, %xmm6
; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax
; BWON-F16C-NEXT: movzwl %ax, %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm7
; BWON-F16C-NEXT: vmovdqa {{.*#+}} xmm6 = [2,3,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; BWON-F16C-NEXT: vpshufb %xmm6, %xmm1, %xmm7
; BWON-F16C-NEXT: vcvtph2ps %xmm7, %xmm7
; BWON-F16C-NEXT: vucomiss %xmm6, %xmm7
; BWON-F16C-NEXT: vpshufb %xmm6, %xmm0, %xmm6
; BWON-F16C-NEXT: vcvtph2ps %xmm6, %xmm6
; BWON-F16C-NEXT: vucomiss %xmm7, %xmm6
; BWON-F16C-NEXT: ja .LBB26_14
; BWON-F16C-NEXT: # %bb.13:
; BWON-F16C-NEXT: vmovaps %xmm6, %xmm7
; BWON-F16C-NEXT: vmovaps %xmm7, %xmm6
; BWON-F16C-NEXT: .LBB26_14:
; BWON-F16C-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm7, %xmm4
; BWON-F16C-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm6, %xmm4
; BWON-F16C-NEXT: vmovd %xmm4, %eax
; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm4
; BWON-F16C-NEXT: vpsrld $16, %xmm1, %xmm1
; BWON-F16C-NEXT: vpextrw $0, %xmm1, %eax
; BWON-F16C-NEXT: movzwl %ax, %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm1
; BWON-F16C-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
; BWON-F16C-NEXT: vpsrld $16, %xmm0, %xmm0
; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax
; BWON-F16C-NEXT: movzwl %ax, %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm0
; BWON-F16C-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0
; BWON-F16C-NEXT: ja .LBB26_16
Expand All @@ -1760,7 +1719,7 @@ define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 {
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; BWON-F16C-NEXT: vmovd %xmm0, %eax
; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
; BWON-F16C-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
; BWON-F16C-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; BWON-F16C-NEXT: retq
Expand Down
12 changes: 4 additions & 8 deletions llvm/test/CodeGen/X86/pr31088.ll
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,11 @@ define <1 x half> @ir_fadd_v1f16(<1 x half> %arg0, <1 x half> %arg1) nounwind {
;
; F16C-LABEL: ir_fadd_v1f16:
; F16C: # %bb.0:
; F16C-NEXT: vpextrw $0, %xmm0, %eax
; F16C-NEXT: vpextrw $0, %xmm1, %ecx
; F16C-NEXT: movzwl %cx, %ecx
; F16C-NEXT: vmovd %ecx, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: movzwl %ax, %eax
; F16C-NEXT: vmovd %eax, %xmm1
; F16C-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
; F16C-NEXT: vaddss %xmm0, %xmm1, %xmm0
; F16C-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/pr34605.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ define void @pr34605(ptr nocapture %s, i32 %p) {
; CHECK-NEXT: kmovd %ecx, %k1
; CHECK-NEXT: kmovd %k1, %k1
; CHECK-NEXT: kandq %k1, %k0, %k1
; CHECK-NEXT: vmovdqu8 {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0 {%k1} {z}
; CHECK-NEXT: vmovdqu8 {{.*#+}} zmm0 {%k1} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; CHECK-NEXT: vmovdqu64 %zmm0, (%eax)
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vmovdqu64 %zmm0, 64(%eax)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/pr38803.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ define dso_local float @_Z3fn2v() {
; CHECK-NEXT: callq _Z1av@PLT
; CHECK-NEXT: # kill: def $al killed $al def $eax
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: vmovss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} {z}
; CHECK-NEXT: vmovss {{.*#+}} xmm0 {%k1} {z} = [7.5E-1,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: cmpl $0, c(%rip)
; CHECK-NEXT: je .LBB0_2
; CHECK-NEXT: # %bb.1: # %if.then
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/pr43509.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ define <8 x i8> @foo(<8 x float> %arg) {
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpltps %ymm1, %ymm0, %k1
; CHECK-NEXT: vcmpgtps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
; CHECK-NEXT: vmovdqu8 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} {z}
; CHECK-NEXT: vmovdqu8 {{.*#+}} xmm0 {%k1} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
bb:
Expand Down
203 changes: 74 additions & 129 deletions llvm/test/CodeGen/X86/pr57340.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,54 +5,43 @@ define void @main.41() local_unnamed_addr #1 {
; CHECK-LABEL: main.41:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vpbroadcastw (%rax), %xmm0
; CHECK-NEXT: vmovdqu (%rax), %ymm2
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm3
; CHECK-NEXT: vpmovsxbw {{.*#+}} ymm1 = [31,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; CHECK-NEXT: vpermi2w %ymm3, %ymm2, %ymm1
; CHECK-NEXT: vpextrw $0, %xmm0, %eax
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: vmovd %eax, %xmm0
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0
; CHECK-NEXT: vmovdqu (%rax), %xmm5
; CHECK-NEXT: vpextrw $0, %xmm5, %eax
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: vmovd %eax, %xmm2
; CHECK-NEXT: vmovdqu (%rax), %ymm1
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2
; CHECK-NEXT: vpmovsxbw {{.*#+}} ymm3 = [31,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; CHECK-NEXT: vpermi2w %ymm2, %ymm1, %ymm3
; CHECK-NEXT: vmovdqu (%rax), %xmm10
; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [2,3,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; CHECK-NEXT: vpshufb %xmm1, %xmm10, %xmm2
; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2
; CHECK-NEXT: vucomiss %xmm0, %xmm2
; CHECK-NEXT: vpshufb %xmm1, %xmm3, %xmm4
; CHECK-NEXT: vcvtph2ps %xmm4, %xmm4
; CHECK-NEXT: vucomiss %xmm4, %xmm2
; CHECK-NEXT: setnp %al
; CHECK-NEXT: sete %cl
; CHECK-NEXT: testb %al, %cl
; CHECK-NEXT: vpsrld $16, %xmm1, %xmm3
; CHECK-NEXT: vpextrw $0, %xmm3, %eax
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: vmovd %eax, %xmm3
; CHECK-NEXT: vpsrld $16, %xmm5, %xmm4
; CHECK-NEXT: vpextrw $0, %xmm4, %eax
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: vmovd %eax, %xmm4
; CHECK-NEXT: setne %al
; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: vcvtph2ps %xmm3, %xmm6
; CHECK-NEXT: vcvtph2ps %xmm4, %xmm3
; CHECK-NEXT: kmovw %eax, %k0
; CHECK-NEXT: vucomiss %xmm6, %xmm3
; CHECK-NEXT: kmovd %eax, %k0
; CHECK-NEXT: kshiftlw $15, %k0, %k0
; CHECK-NEXT: kshiftrw $14, %k0, %k0
; CHECK-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0
; CHECK-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm10[0],zero,zero,zero,xmm10[1],zero,zero,zero
; CHECK-NEXT: vcvtph2ps %xmm4, %xmm11
; CHECK-NEXT: vucomiss %xmm0, %xmm11
; CHECK-NEXT: setnp %al
; CHECK-NEXT: sete %cl
; CHECK-NEXT: testb %al, %cl
; CHECK-NEXT: setne %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kshiftlw $15, %k1, %k1
; CHECK-NEXT: kshiftrw $14, %k1, %k1
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: korw %k0, %k1, %k0
; CHECK-NEXT: movw $-5, %ax
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: vprolq $32, %xmm1, %xmm4
; CHECK-NEXT: vpextrw $0, %xmm4, %eax
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: vmovd %eax, %xmm4
; CHECK-NEXT: vcvtph2ps %xmm4, %xmm4
; CHECK-NEXT: vucomiss %xmm4, %xmm0
; CHECK-NEXT: vmovdqa {{.*#+}} xmm4 = [4,5,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; CHECK-NEXT: vpshufb %xmm4, %xmm3, %xmm5
; CHECK-NEXT: vcvtph2ps %xmm5, %xmm5
; CHECK-NEXT: vucomiss %xmm5, %xmm0
; CHECK-NEXT: setnp %al
; CHECK-NEXT: sete %cl
; CHECK-NEXT: testb %al, %cl
Expand All @@ -63,18 +52,12 @@ define void @main.41() local_unnamed_addr #1 {
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: movw $-9, %ax
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: vpsrlq $48, %xmm1, %xmm4
; CHECK-NEXT: vpextrw $0, %xmm4, %eax
; CHECK-NEXT: vpsrlq $48, %xmm3, %xmm5
; CHECK-NEXT: vcvtph2ps %xmm5, %xmm6
; CHECK-NEXT: vpsrlq $48, %xmm10, %xmm5
; CHECK-NEXT: vcvtph2ps %xmm5, %xmm5
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: vmovd %eax, %xmm4
; CHECK-NEXT: vcvtph2ps %xmm4, %xmm6
; CHECK-NEXT: vpsrlq $48, %xmm5, %xmm4
; CHECK-NEXT: vpextrw $0, %xmm4, %eax
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: vmovd %eax, %xmm4
; CHECK-NEXT: vcvtph2ps %xmm4, %xmm4
; CHECK-NEXT: vucomiss %xmm6, %xmm4
; CHECK-NEXT: vucomiss %xmm6, %xmm5
; CHECK-NEXT: setnp %al
; CHECK-NEXT: sete %cl
; CHECK-NEXT: testb %al, %cl
Expand All @@ -85,13 +68,11 @@ define void @main.41() local_unnamed_addr #1 {
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: movw $-17, %ax
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: vpshufd {{.*#+}} xmm6 = xmm1[2,3,0,1]
; CHECK-NEXT: vpextrw $0, %xmm6, %eax
; CHECK-NEXT: vmovdqa {{.*#+}} xmm6 = [8,9,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: vmovd %eax, %xmm6
; CHECK-NEXT: vcvtph2ps %xmm6, %xmm6
; CHECK-NEXT: vucomiss %xmm6, %xmm0
; CHECK-NEXT: vpshufb %xmm6, %xmm3, %xmm7
; CHECK-NEXT: vcvtph2ps %xmm7, %xmm7
; CHECK-NEXT: vucomiss %xmm7, %xmm0
; CHECK-NEXT: setnp %al
; CHECK-NEXT: sete %cl
; CHECK-NEXT: testb %al, %cl
Expand All @@ -102,18 +83,13 @@ define void @main.41() local_unnamed_addr #1 {
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: movw $-33, %ax
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: vpsrldq {{.*#+}} xmm6 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vpextrw $0, %xmm6, %eax
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: vmovd %eax, %xmm6
; CHECK-NEXT: vcvtph2ps %xmm6, %xmm7
; CHECK-NEXT: vpsrldq {{.*#+}} xmm6 = xmm5[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vpextrw $0, %xmm6, %eax
; CHECK-NEXT: vmovdqa {{.*#+}} xmm7 = [10,11,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; CHECK-NEXT: vpshufb %xmm7, %xmm10, %xmm8
; CHECK-NEXT: vcvtph2ps %xmm8, %xmm8
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: vmovd %eax, %xmm6
; CHECK-NEXT: vcvtph2ps %xmm6, %xmm6
; CHECK-NEXT: vucomiss %xmm7, %xmm6
; CHECK-NEXT: vpshufb %xmm7, %xmm3, %xmm9
; CHECK-NEXT: vcvtph2ps %xmm9, %xmm9
; CHECK-NEXT: vucomiss %xmm9, %xmm8
; CHECK-NEXT: setnp %al
; CHECK-NEXT: sete %cl
; CHECK-NEXT: testb %al, %cl
Expand All @@ -124,13 +100,11 @@ define void @main.41() local_unnamed_addr #1 {
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: movw $-65, %ax
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: vmovdqa {{.*#+}} xmm9 = [12,13,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; CHECK-NEXT: vpshufb %xmm9, %xmm3, %xmm12
; CHECK-NEXT: vcvtph2ps %xmm12, %xmm12
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: vpshufd {{.*#+}} xmm7 = xmm1[3,3,3,3]
; CHECK-NEXT: vpextrw $0, %xmm7, %eax
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: vmovd %eax, %xmm7
; CHECK-NEXT: vcvtph2ps %xmm7, %xmm7
; CHECK-NEXT: vucomiss %xmm7, %xmm0
; CHECK-NEXT: vucomiss %xmm12, %xmm0
; CHECK-NEXT: setnp %al
; CHECK-NEXT: sete %cl
; CHECK-NEXT: testb %al, %cl
Expand All @@ -142,17 +116,11 @@ define void @main.41() local_unnamed_addr #1 {
; CHECK-NEXT: movw $-129, %ax
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: vpsrldq {{.*#+}} xmm7 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vpextrw $0, %xmm7, %eax
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: vmovd %eax, %xmm7
; CHECK-NEXT: vcvtph2ps %xmm7, %xmm7
; CHECK-NEXT: vpsrldq {{.*#+}} xmm5 = xmm5[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vpextrw $0, %xmm5, %eax
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: vmovd %eax, %xmm5
; CHECK-NEXT: vcvtph2ps %xmm5, %xmm5
; CHECK-NEXT: vucomiss %xmm7, %xmm5
; CHECK-NEXT: vpsrldq {{.*#+}} xmm12 = xmm3[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vcvtph2ps %xmm12, %xmm12
; CHECK-NEXT: vpsrldq {{.*#+}} xmm10 = xmm10[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vcvtph2ps %xmm10, %xmm10
; CHECK-NEXT: vucomiss %xmm12, %xmm10
; CHECK-NEXT: setnp %al
; CHECK-NEXT: sete %cl
; CHECK-NEXT: testb %al, %cl
Expand All @@ -163,13 +131,11 @@ define void @main.41() local_unnamed_addr #1 {
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: movw $-257, %ax # imm = 0xFEFF
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: vextracti128 $1, %ymm3, %xmm3
; CHECK-NEXT: vpmovzxwq {{.*#+}} xmm12 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
; CHECK-NEXT: vcvtph2ps %xmm12, %xmm12
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1
; CHECK-NEXT: vpextrw $0, %xmm1, %eax
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: vmovd %eax, %xmm7
; CHECK-NEXT: vcvtph2ps %xmm7, %xmm7
; CHECK-NEXT: vucomiss %xmm7, %xmm2
; CHECK-NEXT: vucomiss %xmm12, %xmm11
; CHECK-NEXT: setnp %al
; CHECK-NEXT: sete %cl
; CHECK-NEXT: testb %al, %cl
Expand All @@ -181,12 +147,9 @@ define void @main.41() local_unnamed_addr #1 {
; CHECK-NEXT: movw $-513, %ax # imm = 0xFDFF
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: vpsrld $16, %xmm1, %xmm2
; CHECK-NEXT: vpextrw $0, %xmm2, %eax
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: vmovd %eax, %xmm2
; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2
; CHECK-NEXT: vucomiss %xmm2, %xmm3
; CHECK-NEXT: vpshufb %xmm1, %xmm3, %xmm1
; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1
; CHECK-NEXT: vucomiss %xmm1, %xmm2
; CHECK-NEXT: setnp %al
; CHECK-NEXT: sete %cl
; CHECK-NEXT: testb %al, %cl
Expand All @@ -197,13 +160,10 @@ define void @main.41() local_unnamed_addr #1 {
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: movw $-1025, %ax # imm = 0xFBFF
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: vprolq $32, %xmm1, %xmm2
; CHECK-NEXT: vpextrw $0, %xmm2, %eax
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: vmovd %eax, %xmm2
; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: vucomiss %xmm2, %xmm0
; CHECK-NEXT: vpshufb %xmm4, %xmm3, %xmm1
; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1
; CHECK-NEXT: vucomiss %xmm1, %xmm0
; CHECK-NEXT: setnp %al
; CHECK-NEXT: sete %cl
; CHECK-NEXT: testb %al, %cl
Expand All @@ -215,12 +175,9 @@ define void @main.41() local_unnamed_addr #1 {
; CHECK-NEXT: movw $-2049, %ax # imm = 0xF7FF
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: vpsrlq $48, %xmm1, %xmm2
; CHECK-NEXT: vpextrw $0, %xmm2, %eax
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: vmovd %eax, %xmm2
; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2
; CHECK-NEXT: vucomiss %xmm2, %xmm4
; CHECK-NEXT: vpsrlq $48, %xmm3, %xmm1
; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1
; CHECK-NEXT: vucomiss %xmm1, %xmm5
; CHECK-NEXT: setnp %al
; CHECK-NEXT: sete %cl
; CHECK-NEXT: testb %al, %cl
Expand All @@ -231,13 +188,10 @@ define void @main.41() local_unnamed_addr #1 {
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: movw $-4097, %ax # imm = 0xEFFF
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
; CHECK-NEXT: vpextrw $0, %xmm2, %eax
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: vmovd %eax, %xmm2
; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2
; CHECK-NEXT: vucomiss %xmm2, %xmm0
; CHECK-NEXT: vpshufb %xmm6, %xmm3, %xmm1
; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1
; CHECK-NEXT: vucomiss %xmm1, %xmm0
; CHECK-NEXT: setnp %al
; CHECK-NEXT: sete %cl
; CHECK-NEXT: testb %al, %cl
Expand All @@ -249,12 +203,9 @@ define void @main.41() local_unnamed_addr #1 {
; CHECK-NEXT: movw $-8193, %ax # imm = 0xDFFF
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vpextrw $0, %xmm2, %eax
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: vmovd %eax, %xmm2
; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2
; CHECK-NEXT: vucomiss %xmm2, %xmm6
; CHECK-NEXT: vpshufb %xmm7, %xmm3, %xmm1
; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1
; CHECK-NEXT: vucomiss %xmm1, %xmm8
; CHECK-NEXT: setnp %al
; CHECK-NEXT: sete %cl
; CHECK-NEXT: testb %al, %cl
Expand All @@ -265,13 +216,10 @@ define void @main.41() local_unnamed_addr #1 {
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: movw $-16385, %ax # imm = 0xBFFF
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: vpshufb %xmm9, %xmm3, %xmm1
; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[3,3,3,3]
; CHECK-NEXT: vpextrw $0, %xmm2, %eax
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: vmovd %eax, %xmm2
; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2
; CHECK-NEXT: vucomiss %xmm2, %xmm0
; CHECK-NEXT: vucomiss %xmm1, %xmm0
; CHECK-NEXT: setnp %al
; CHECK-NEXT: sete %cl
; CHECK-NEXT: testb %al, %cl
Expand All @@ -280,21 +228,18 @@ define void @main.41() local_unnamed_addr #1 {
; CHECK-NEXT: kshiftlw $14, %k1, %k1
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: kshiftlw $1, %k0, %k0
; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vpextrw $0, %xmm0, %eax
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: vmovd %eax, %xmm0
; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm3[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0
; CHECK-NEXT: kshiftrw $1, %k0, %k0
; CHECK-NEXT: vucomiss %xmm0, %xmm5
; CHECK-NEXT: vucomiss %xmm0, %xmm10
; CHECK-NEXT: setnp %al
; CHECK-NEXT: sete %cl
; CHECK-NEXT: testb %al, %cl
; CHECK-NEXT: setne %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kshiftlw $15, %k1, %k1
; CHECK-NEXT: korw %k1, %k0, %k1
; CHECK-NEXT: vmovdqu8 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} {z}
; CHECK-NEXT: vmovdqu8 {{.*#+}} xmm0 {%k1} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; CHECK-NEXT: vmovdqa %xmm0, (%rax)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/pr78897.ll
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ define <16 x i8> @produceShuffleVectorForByte(i8 zeroext %0) nounwind {
; X86-AVX512-NEXT: pushl %esi
; X86-AVX512-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %xmm0
; X86-AVX512-NEXT: vptestnmb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %k1
; X86-AVX512-NEXT: vmovdqu8 {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 {%k1} {z}
; X86-AVX512-NEXT: vmovdqu8 {{.*#+}} xmm0 {%k1} {z} = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u]
; X86-AVX512-NEXT: vpextrd $1, %xmm0, %eax
; X86-AVX512-NEXT: vmovd %xmm0, %edx
; X86-AVX512-NEXT: movl $286331152, %ecx # imm = 0x11111110
Expand Down Expand Up @@ -258,7 +258,7 @@ define <16 x i8> @produceShuffleVectorForByte(i8 zeroext %0) nounwind {
; X64-AVX512: # %bb.0: # %entry
; X64-AVX512-NEXT: vpbroadcastb %edi, %xmm0
; X64-AVX512-NEXT: vptestnmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
; X64-AVX512-NEXT: vmovdqu8 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} {z}
; X64-AVX512-NEXT: vmovdqu8 {{.*#+}} xmm0 {%k1} {z} = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u]
; X64-AVX512-NEXT: vmovq %xmm0, %rax
; X64-AVX512-NEXT: movabsq $1229782938247303440, %rcx # imm = 0x1111111111111110
; X64-AVX512-NEXT: movabsq $76861433640456465, %rdx # imm = 0x111111111111111
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/select-of-fp-constants.ll
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ define float @fcmp_select_fp_constants(float %x) nounwind readnone {
; X64-AVX512F: # %bb.0:
; X64-AVX512F-NEXT: vcmpneqss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
; X64-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0]
; X64-AVX512F-NEXT: vmovss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1}
; X64-AVX512F-NEXT: vmovss {{.*#+}} xmm0 {%k1} = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
; X64-AVX512F-NEXT: retq
%c = fcmp une float %x, -4.0
%r = select i1 %c, float 42.0, float 23.0
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/select-of-half-constants.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
define half @fcmp_select_fp_constants_olt(half %x) nounwind readnone {
; X64-AVX512FP16-LABEL: fcmp_select_fp_constants_olt:
; X64-AVX512FP16: # %bb.0:
; X64-AVX512FP16-NEXT: vmovsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; X64-AVX512FP16-NEXT: vmovsh {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X64-AVX512FP16-NEXT: vcmpltsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
; X64-AVX512FP16-NEXT: vmovsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-AVX512FP16-NEXT: vmovsh {{.*#+}} xmm0 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X64-AVX512FP16-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
; X64-AVX512FP16-NEXT: retq
%c = fcmp olt half %x, -4.0
Expand All @@ -19,9 +19,9 @@ define half @fcmp_select_fp_constants_olt(half %x) nounwind readnone {
define half @fcmp_select_fp_constants_ogt(half %x) nounwind readnone {
; X64-AVX512FP16-LABEL: fcmp_select_fp_constants_ogt:
; X64-AVX512FP16: # %bb.0:
; X64-AVX512FP16-NEXT: vmovsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; X64-AVX512FP16-NEXT: vmovsh {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X64-AVX512FP16-NEXT: vcmpgtsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
; X64-AVX512FP16-NEXT: vmovsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-AVX512FP16-NEXT: vmovsh {{.*#+}} xmm0 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X64-AVX512FP16-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
; X64-AVX512FP16-NEXT: retq
%c = fcmp ogt half %x, -4.0
Expand Down
22 changes: 6 additions & 16 deletions llvm/test/CodeGen/X86/vector-half-conversions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4976,32 +4976,22 @@ define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind {
;
; F16C-LABEL: fptosi_2f16_to_4i32:
; F16C: # %bb.0:
; F16C-NEXT: vpextrw $0, %xmm0, %eax
; F16C-NEXT: movzwl %ax, %eax
; F16C-NEXT: vmovd %eax, %xmm1
; F16C-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
; F16C-NEXT: vpsrld $16, %xmm0, %xmm0
; F16C-NEXT: vpextrw $0, %xmm0, %eax
; F16C-NEXT: movzwl %ax, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; F16C-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; F16C-NEXT: vcvttps2dq %xmm0, %xmm0
; F16C-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; F16C-NEXT: retq
;
; AVX512-LABEL: fptosi_2f16_to_4i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpextrw $0, %xmm0, %eax
; AVX512-NEXT: movzwl %ax, %eax
; AVX512-NEXT: vmovd %eax, %xmm1
; AVX512-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX512-NEXT: vpextrw $0, %xmm0, %eax
; AVX512-NEXT: movzwl %ax, %eax
; AVX512-NEXT: vmovd %eax, %xmm0
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; AVX512-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512-NEXT: retq
Expand Down
16 changes: 4 additions & 12 deletions llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
Original file line number Diff line number Diff line change
Expand Up @@ -413,13 +413,9 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512F-NEXT: vpextrw $0, %xmm0, %eax
; AVX512F-NEXT: movzwl %ax, %eax
; AVX512F-NEXT: vmovd %eax, %xmm2
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX512F-NEXT: vpextrw $0, %xmm1, %eax
; AVX512F-NEXT: movzwl %ax, %eax
; AVX512F-NEXT: vmovd %eax, %xmm3
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512F-NEXT: vcvtph2ps %xmm3, %xmm3
; AVX512F-NEXT: xorl %eax, %eax
; AVX512F-NEXT: vucomiss %xmm3, %xmm2
Expand All @@ -434,13 +430,9 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
; AVX512VL-LABEL: test_v2f16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512VL-NEXT: vpextrw $0, %xmm0, %eax
; AVX512VL-NEXT: movzwl %ax, %eax
; AVX512VL-NEXT: vmovd %eax, %xmm2
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX512VL-NEXT: vpextrw $0, %xmm1, %eax
; AVX512VL-NEXT: movzwl %ax, %eax
; AVX512VL-NEXT: vmovd %eax, %xmm3
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3
; AVX512VL-NEXT: xorl %eax, %eax
; AVX512VL-NEXT: vucomiss %xmm3, %xmm2
Expand Down
16 changes: 4 additions & 12 deletions llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
Original file line number Diff line number Diff line change
Expand Up @@ -412,13 +412,9 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512F-NEXT: vpextrw $0, %xmm0, %eax
; AVX512F-NEXT: movzwl %ax, %eax
; AVX512F-NEXT: vmovd %eax, %xmm2
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX512F-NEXT: vpextrw $0, %xmm1, %eax
; AVX512F-NEXT: movzwl %ax, %eax
; AVX512F-NEXT: vmovd %eax, %xmm3
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512F-NEXT: vcvtph2ps %xmm3, %xmm3
; AVX512F-NEXT: xorl %eax, %eax
; AVX512F-NEXT: vucomiss %xmm3, %xmm2
Expand All @@ -433,13 +429,9 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
; AVX512VL-LABEL: test_v2f16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512VL-NEXT: vpextrw $0, %xmm0, %eax
; AVX512VL-NEXT: movzwl %ax, %eax
; AVX512VL-NEXT: vmovd %eax, %xmm2
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX512VL-NEXT: vpextrw $0, %xmm1, %eax
; AVX512VL-NEXT: movzwl %ax, %eax
; AVX512VL-NEXT: vmovd %eax, %xmm3
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3
; AVX512VL-NEXT: xorl %eax, %eax
; AVX512VL-NEXT: vucomiss %xmm3, %xmm2
Expand Down
6 changes: 0 additions & 6 deletions llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2012,24 +2012,18 @@ define <4 x i32> @extract3_insert0_v4i32_7123(<4 x i32> %a0, <4 x i32> %a1) {
; SSE2-LABEL: extract3_insert0_v4i32_7123:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSE2-NEXT: retq
;
; SSE3-LABEL: extract3_insert0_v4i32_7123:
; SSE3: # %bb.0:
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
; SSE3-NEXT: movd %xmm1, %eax
; SSE3-NEXT: movd %eax, %xmm1
; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: extract3_insert0_v4i32_7123:
; SSSE3: # %bb.0:
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
; SSSE3-NEXT: movd %xmm1, %eax
; SSSE3-NEXT: movd %eax, %xmm1
; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSSE3-NEXT: retq
;
Expand Down