6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ define <16 x i8> @PR50049(ptr %p1, ptr %p2) {
; SSE-NEXT: pmovzxbw {{.*#+}} xmm5 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
; SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; SSE-NEXT: pmullw %xmm5, %xmm0
; SSE-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
; SSE-NEXT: pmovzxbw {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
; SSE-NEXT: pand %xmm5, %xmm0
; SSE-NEXT: movdqa {{.*#+}} xmm6 = [8,u,9,u,10,u,128,u,128,u,128,u,128,u,128,u]
; SSE-NEXT: pmovzxbw {{.*#+}} xmm6 = [8,9,10,128,128,128,128,128]
; SSE-NEXT: pshufb %xmm6, %xmm4
; SSE-NEXT: movdqa {{.*#+}} xmm7 = [128,u,128,u,128,u,1,u,4,u,7,u,10,u,13,u]
; SSE-NEXT: pmovzxbw {{.*#+}} xmm7 = [128,128,128,1,4,7,10,13]
; SSE-NEXT: pshufb %xmm7, %xmm3
; SSE-NEXT: por %xmm4, %xmm3
; SSE-NEXT: pshufb %xmm6, %xmm2
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/vector-trunc-math.ll
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ define <16 x i8> @trunc_add_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin
; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm7
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
; AVX1-NEXT: vpaddq %xmm7, %xmm3, %xmm3
; AVX1-NEXT: vpmovsxwq {{.*#+}} xmm7 = [255,255]
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm7 = [255,255]
; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3
; AVX1-NEXT: vpand %xmm7, %xmm6, %xmm6
; AVX1-NEXT: vpackusdw %xmm3, %xmm6, %xmm3
Expand Down Expand Up @@ -994,7 +994,7 @@ define <16 x i8> @trunc_sub_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin
; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm7
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
; AVX1-NEXT: vpsubq %xmm7, %xmm3, %xmm3
; AVX1-NEXT: vpmovsxwq {{.*#+}} xmm7 = [255,255]
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm7 = [255,255]
; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3
; AVX1-NEXT: vpand %xmm7, %xmm6, %xmm6
; AVX1-NEXT: vpackusdw %xmm3, %xmm6, %xmm3
Expand Down Expand Up @@ -1831,7 +1831,7 @@ define <16 x i8> @trunc_mul_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin
; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm7
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
; AVX1-NEXT: vpmuludq %xmm7, %xmm3, %xmm3
; AVX1-NEXT: vpmovsxwq {{.*#+}} xmm7 = [255,255]
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm7 = [255,255]
; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3
; AVX1-NEXT: vpand %xmm7, %xmm6, %xmm6
; AVX1-NEXT: vpackusdw %xmm3, %xmm6, %xmm3
Expand Down Expand Up @@ -2292,7 +2292,7 @@ define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm7
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
; AVX1-NEXT: vpmovsxwq {{.*#+}} xmm8 = [255,255]
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm8 = [255,255]
; AVX1-NEXT: vpand %xmm3, %xmm8, %xmm3
; AVX1-NEXT: vpand %xmm7, %xmm8, %xmm7
; AVX1-NEXT: vpackusdw %xmm3, %xmm7, %xmm3
Expand Down
93 changes: 44 additions & 49 deletions llvm/test/CodeGen/X86/vector-trunc-packus.ll

Large diffs are not rendered by default.

56 changes: 28 additions & 28 deletions llvm/test/CodeGen/X86/vector-trunc-ssat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ define <2 x i32> @trunc_ssat_v2i64_v2i32(<2 x i64> %a0) {
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: movapd {{.*#+}} xmm2 = [2147483647,2147483647]
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pxor %xmm3, %xmm0
; SSE41-NEXT: pmovsxbd {{.*#+}} xmm4 = [4294967295,0,4294967295,0]
; SSE41-NEXT: movdqa %xmm4, %xmm5
Expand Down Expand Up @@ -179,7 +179,7 @@ define void @trunc_ssat_v2i64_v2i32_store(<2 x i64> %a0, ptr %p1) {
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: movapd {{.*#+}} xmm2 = [2147483647,2147483647]
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pxor %xmm3, %xmm0
; SSE41-NEXT: pmovsxbd {{.*#+}} xmm4 = [4294967295,0,4294967295,0]
; SSE41-NEXT: movdqa %xmm4, %xmm5
Expand Down Expand Up @@ -333,7 +333,7 @@ define <4 x i32> @trunc_ssat_v4i64_v4i32(<4 x i64> %a0) {
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: movapd {{.*#+}} xmm4 = [2147483647,2147483647]
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pxor %xmm3, %xmm0
; SSE41-NEXT: pmovsxbd {{.*#+}} xmm6 = [4294967295,0,4294967295,0]
; SSE41-NEXT: movdqa %xmm6, %xmm5
Expand Down Expand Up @@ -603,7 +603,7 @@ define <8 x i32> @trunc_ssat_v8i64_v8i32(ptr %p0) "min-legal-vector-width"="256"
; SSE41-NEXT: movdqa 32(%rdi), %xmm7
; SSE41-NEXT: movdqa 48(%rdi), %xmm2
; SSE41-NEXT: movapd {{.*#+}} xmm1 = [2147483647,2147483647]
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: movdqa %xmm5, %xmm0
; SSE41-NEXT: pxor %xmm3, %xmm0
; SSE41-NEXT: pmovsxbd {{.*#+}} xmm6 = [4294967295,0,4294967295,0]
Expand Down Expand Up @@ -846,9 +846,9 @@ define <2 x i16> @trunc_ssat_v2i64_v2i16(<2 x i64> %a0) {
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: movapd {{.*#+}} xmm2 = [32767,32767]
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pxor %xmm3, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [2147516415,2147516415]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = [2147516415,2147516415]
; SSE41-NEXT: movdqa %xmm4, %xmm5
; SSE41-NEXT: pcmpeqd %xmm0, %xmm5
; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
Expand Down Expand Up @@ -980,9 +980,9 @@ define void @trunc_ssat_v2i64_v2i16_store(<2 x i64> %a0, ptr%p1) {
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: movapd {{.*#+}} xmm2 = [32767,32767]
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pxor %xmm3, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [2147516415,2147516415]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = [2147516415,2147516415]
; SSE41-NEXT: movdqa %xmm4, %xmm5
; SSE41-NEXT: pcmpeqd %xmm0, %xmm5
; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
Expand Down Expand Up @@ -1148,9 +1148,9 @@ define <4 x i16> @trunc_ssat_v4i64_v4i16(<4 x i64> %a0) {
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: movapd {{.*#+}} xmm4 = [32767,32767]
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pxor %xmm3, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147516415,2147516415]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147516415,2147516415]
; SSE41-NEXT: movdqa %xmm6, %xmm5
; SSE41-NEXT: pcmpeqd %xmm0, %xmm5
; SSE41-NEXT: movdqa %xmm6, %xmm7
Expand Down Expand Up @@ -1332,9 +1332,9 @@ define void @trunc_ssat_v4i64_v4i16_store(<4 x i64> %a0, ptr%p1) {
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: movapd {{.*#+}} xmm4 = [32767,32767]
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pxor %xmm3, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147516415,2147516415]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147516415,2147516415]
; SSE41-NEXT: movdqa %xmm6, %xmm5
; SSE41-NEXT: pcmpeqd %xmm0, %xmm5
; SSE41-NEXT: movdqa %xmm6, %xmm7
Expand Down Expand Up @@ -1578,10 +1578,10 @@ define <8 x i16> @trunc_ssat_v8i64_v8i16(ptr %p0) "min-legal-vector-width"="256"
; SSE41-NEXT: movdqa 32(%rdi), %xmm4
; SSE41-NEXT: movdqa 48(%rdi), %xmm8
; SSE41-NEXT: movapd {{.*#+}} xmm1 = [32767,32767]
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648]
; SSE41-NEXT: movdqa %xmm4, %xmm0
; SSE41-NEXT: pxor %xmm2, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147516415,2147516415]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147516415,2147516415]
; SSE41-NEXT: movdqa %xmm6, %xmm3
; SSE41-NEXT: pcmpeqd %xmm0, %xmm3
; SSE41-NEXT: movdqa %xmm6, %xmm9
Expand Down Expand Up @@ -1999,9 +1999,9 @@ define <2 x i8> @trunc_ssat_v2i64_v2i8(<2 x i64> %a0) {
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: movapd {{.*#+}} xmm2 = [127,127]
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pxor %xmm3, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [2147483775,2147483775]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = [2147483775,2147483775]
; SSE41-NEXT: movdqa %xmm4, %xmm5
; SSE41-NEXT: pcmpeqd %xmm0, %xmm5
; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
Expand Down Expand Up @@ -2145,9 +2145,9 @@ define void @trunc_ssat_v2i64_v2i8_store(<2 x i64> %a0, ptr%p1) {
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: movapd {{.*#+}} xmm2 = [127,127]
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pxor %xmm3, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [2147483775,2147483775]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = [2147483775,2147483775]
; SSE41-NEXT: movdqa %xmm4, %xmm5
; SSE41-NEXT: pcmpeqd %xmm0, %xmm5
; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
Expand Down Expand Up @@ -2287,9 +2287,9 @@ define <4 x i8> @trunc_ssat_v4i64_v4i8(<4 x i64> %a0) {
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: movapd {{.*#+}} xmm4 = [127,127]
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pxor %xmm3, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147483775,2147483775]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147483775,2147483775]
; SSE41-NEXT: movdqa %xmm6, %xmm5
; SSE41-NEXT: pcmpeqd %xmm0, %xmm5
; SSE41-NEXT: movdqa %xmm6, %xmm7
Expand Down Expand Up @@ -2475,9 +2475,9 @@ define void @trunc_ssat_v4i64_v4i8_store(<4 x i64> %a0, ptr%p1) {
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: movapd {{.*#+}} xmm4 = [127,127]
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
; SSE41-NEXT: pxor %xmm3, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147483775,2147483775]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147483775,2147483775]
; SSE41-NEXT: movdqa %xmm6, %xmm5
; SSE41-NEXT: pcmpeqd %xmm0, %xmm5
; SSE41-NEXT: movdqa %xmm6, %xmm7
Expand Down Expand Up @@ -2725,10 +2725,10 @@ define <8 x i8> @trunc_ssat_v8i64_v8i8(ptr %p0) "min-legal-vector-width"="256" {
; SSE41-NEXT: movdqa 32(%rdi), %xmm4
; SSE41-NEXT: movdqa 48(%rdi), %xmm8
; SSE41-NEXT: movapd {{.*#+}} xmm1 = [127,127]
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648]
; SSE41-NEXT: movdqa %xmm4, %xmm0
; SSE41-NEXT: pxor %xmm2, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147483775,2147483775]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147483775,2147483775]
; SSE41-NEXT: movdqa %xmm6, %xmm3
; SSE41-NEXT: pcmpeqd %xmm0, %xmm3
; SSE41-NEXT: movdqa %xmm6, %xmm9
Expand Down Expand Up @@ -3021,10 +3021,10 @@ define void @trunc_ssat_v8i64_v8i8_store(ptr %p0, ptr%p1) "min-legal-vector-widt
; SSE41-NEXT: movdqa 32(%rdi), %xmm3
; SSE41-NEXT: movdqa 48(%rdi), %xmm8
; SSE41-NEXT: movapd {{.*#+}} xmm4 = [127,127]
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = [2147483648,2147483648]
; SSE41-NEXT: movdqa %xmm3, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147483775,2147483775]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147483775,2147483775]
; SSE41-NEXT: movdqa %xmm6, %xmm2
; SSE41-NEXT: pcmpeqd %xmm0, %xmm2
; SSE41-NEXT: movdqa %xmm6, %xmm9
Expand Down Expand Up @@ -3429,10 +3429,10 @@ define <16 x i8> @trunc_ssat_v16i64_v16i8(ptr %p0) "min-legal-vector-width"="256
; SSE41-NEXT: movdqa 112(%rdi), %xmm5
; SSE41-NEXT: movdqa 96(%rdi), %xmm4
; SSE41-NEXT: movapd {{.*#+}} xmm1 = [127,127]
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648]
; SSE41-NEXT: movdqa %xmm4, %xmm0
; SSE41-NEXT: pxor %xmm2, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm9 = [2147483775,2147483775]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm9 = [2147483775,2147483775]
; SSE41-NEXT: movdqa %xmm9, %xmm3
; SSE41-NEXT: pcmpeqd %xmm0, %xmm3
; SSE41-NEXT: movdqa %xmm9, %xmm13
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/X86/vector-trunc-usat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) {
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729]
; AVX512F-NEXT: vpmovzxdq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729]
; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
; AVX512F-NEXT: vpmovqd %zmm1, %ymm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Expand All @@ -302,7 +302,7 @@ define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) {
; AVX512VL-LABEL: trunc_usat_v4i64_v4i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729]
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729]
; AVX512VL-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
; AVX512VL-NEXT: vpmovqd %ymm1, %xmm0
; AVX512VL-NEXT: vzeroupper
Expand All @@ -312,7 +312,7 @@ define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) {
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512BW-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729]
; AVX512BW-NEXT: vpmovzxdq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729]
; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
; AVX512BW-NEXT: vpmovqd %zmm1, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Expand All @@ -322,7 +322,7 @@ define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) {
; AVX512BWVL-LABEL: trunc_usat_v4i64_v4i32:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729]
; AVX512BWVL-NEXT: vpmovzxdq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729]
; AVX512BWVL-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
; AVX512BWVL-NEXT: vpmovqd %ymm1, %xmm0
; AVX512BWVL-NEXT: vzeroupper
Expand All @@ -331,7 +331,7 @@ define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) {
; SKX-LABEL: trunc_usat_v4i64_v4i32:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729]
; SKX-NEXT: vpmovzxdq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729]
; SKX-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
; SKX-NEXT: vpmovqd %ymm1, %xmm0
; SKX-NEXT: vzeroupper
Expand Down Expand Up @@ -2986,7 +2986,7 @@ define <8 x i8> @trunc_usat_v8i32_v8i8(<8 x i32> %a0) {
;
; SSE41-LABEL: trunc_usat_v8i32_v8i8:
; SSE41: # %bb.0:
; SSE41-NEXT: pmovsxwd {{.*#+}} xmm2 = [255,255,255,255]
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = [255,255,255,255]
; SSE41-NEXT: pminud %xmm2, %xmm1
; SSE41-NEXT: pminud %xmm2, %xmm0
; SSE41-NEXT: packusdw %xmm1, %xmm0
Expand Down Expand Up @@ -3076,7 +3076,7 @@ define void @trunc_usat_v8i32_v8i8_store(<8 x i32> %a0, ptr%p1) {
;
; SSE41-LABEL: trunc_usat_v8i32_v8i8_store:
; SSE41: # %bb.0:
; SSE41-NEXT: pmovsxwd {{.*#+}} xmm2 = [255,255,255,255]
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = [255,255,255,255]
; SSE41-NEXT: pminud %xmm2, %xmm1
; SSE41-NEXT: pminud %xmm2, %xmm0
; SSE41-NEXT: packusdw %xmm1, %xmm0
Expand Down Expand Up @@ -3190,7 +3190,7 @@ define <16 x i8> @trunc_usat_v16i32_v16i8(ptr %p0) {
;
; SSE41-LABEL: trunc_usat_v16i32_v16i8:
; SSE41: # %bb.0:
; SSE41-NEXT: pmovsxwd {{.*#+}} xmm1 = [255,255,255,255]
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = [255,255,255,255]
; SSE41-NEXT: movdqa 16(%rdi), %xmm2
; SSE41-NEXT: pminud %xmm1, %xmm2
; SSE41-NEXT: movdqa (%rdi), %xmm0
Expand Down Expand Up @@ -3291,7 +3291,7 @@ define void @trunc_usat_v16i32_v16i8_store(ptr %p0, ptr %p1) {
;
; SSE41-LABEL: trunc_usat_v16i32_v16i8_store:
; SSE41: # %bb.0:
; SSE41-NEXT: pmovsxwd {{.*#+}} xmm0 = [255,255,255,255]
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = [255,255,255,255]
; SSE41-NEXT: movdqa 16(%rdi), %xmm1
; SSE41-NEXT: pminud %xmm0, %xmm1
; SSE41-NEXT: movdqa (%rdi), %xmm2
Expand Down Expand Up @@ -3485,7 +3485,7 @@ define <16 x i8> @trunc_usat_v16i16_v16i8(<16 x i16> %a0) {
;
; SSE41-LABEL: trunc_usat_v16i16_v16i8:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE41-NEXT: pminuw %xmm2, %xmm1
; SSE41-NEXT: pminuw %xmm2, %xmm0
; SSE41-NEXT: packuswb %xmm1, %xmm0
Expand Down Expand Up @@ -3576,7 +3576,7 @@ define <32 x i8> @trunc_usat_v32i16_v32i8(ptr %p0) {
;
; SSE41-LABEL: trunc_usat_v32i16_v32i8:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
; SSE41-NEXT: movdqa 48(%rdi), %xmm2
; SSE41-NEXT: pminuw %xmm0, %xmm2
; SSE41-NEXT: movdqa 32(%rdi), %xmm1
Expand Down Expand Up @@ -3729,7 +3729,7 @@ define <32 x i8> @trunc_usat_v32i32_v32i8(ptr %p0) {
;
; SSE41-LABEL: trunc_usat_v32i32_v32i8:
; SSE41: # %bb.0:
; SSE41-NEXT: pmovsxwd {{.*#+}} xmm2 = [255,255,255,255]
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = [255,255,255,255]
; SSE41-NEXT: movdqa 80(%rdi), %xmm0
; SSE41-NEXT: pminud %xmm2, %xmm0
; SSE41-NEXT: movdqa 64(%rdi), %xmm1
Expand Down
124 changes: 83 additions & 41 deletions llvm/test/CodeGen/X86/vector-trunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ define void @trunc8i64_8i8(<8 x i64> %a) {
;
; SSE41-LABEL: trunc8i64_8i8:
; SSE41: # %bb.0: # %entry
; SSE41-NEXT: pmovsxwq {{.*#+}} xmm4 = [255,255]
; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = [255,255]
; SSE41-NEXT: pand %xmm4, %xmm3
; SSE41-NEXT: pand %xmm4, %xmm2
; SSE41-NEXT: packusdw %xmm3, %xmm2
Expand Down Expand Up @@ -505,7 +505,7 @@ define void @trunc8i32_8i8(<8 x i32> %a) {
;
; SSE41-LABEL: trunc8i32_8i8:
; SSE41: # %bb.0: # %entry
; SSE41-NEXT: pmovsxwd {{.*#+}} xmm2 = [255,255,255,255]
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = [255,255,255,255]
; SSE41-NEXT: pand %xmm2, %xmm1
; SSE41-NEXT: pand %xmm2, %xmm0
; SSE41-NEXT: packusdw %xmm1, %xmm0
Expand Down Expand Up @@ -789,7 +789,7 @@ define void @trunc16i32_16i8(<16 x i32> %a) {
;
; SSE41-LABEL: trunc16i32_16i8:
; SSE41: # %bb.0: # %entry
; SSE41-NEXT: pmovsxwd {{.*#+}} xmm4 = [255,255,255,255]
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = [255,255,255,255]
; SSE41-NEXT: pand %xmm4, %xmm3
; SSE41-NEXT: pand %xmm4, %xmm2
; SSE41-NEXT: packusdw %xmm3, %xmm2
Expand Down Expand Up @@ -970,14 +970,23 @@ entry:

;PR25684
define void @trunc16i16_16i8(<16 x i16> %a) {
; SSE-LABEL: trunc16i16_16i8:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: packuswb %xmm1, %xmm0
; SSE-NEXT: movdqu %xmm0, (%rax)
; SSE-NEXT: retq
; SSE2-SSSE3-LABEL: trunc16i16_16i8:
; SSE2-SSSE3: # %bb.0: # %entry
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax)
; SSE2-SSSE3-NEXT: retq
;
; SSE41-LABEL: trunc16i16_16i8:
; SSE41: # %bb.0: # %entry
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE41-NEXT: pand %xmm2, %xmm1
; SSE41-NEXT: pand %xmm2, %xmm0
; SSE41-NEXT: packuswb %xmm1, %xmm0
; SSE41-NEXT: movdqu %xmm0, (%rax)
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc16i16_16i8:
; AVX1: # %bb.0: # %entry
Expand Down Expand Up @@ -1161,18 +1170,31 @@ entry:
}

define void @trunc32i16_32i8(<32 x i16> %a) {
; SSE-LABEL: trunc32i16_32i8:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE-NEXT: pand %xmm4, %xmm1
; SSE-NEXT: pand %xmm4, %xmm0
; SSE-NEXT: packuswb %xmm1, %xmm0
; SSE-NEXT: pand %xmm4, %xmm3
; SSE-NEXT: pand %xmm4, %xmm2
; SSE-NEXT: packuswb %xmm3, %xmm2
; SSE-NEXT: movdqu %xmm2, (%rax)
; SSE-NEXT: movdqu %xmm0, (%rax)
; SSE-NEXT: retq
; SSE2-SSSE3-LABEL: trunc32i16_32i8:
; SSE2-SSSE3: # %bb.0: # %entry
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2
; SSE2-SSSE3-NEXT: movdqu %xmm2, (%rax)
; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax)
; SSE2-SSSE3-NEXT: retq
;
; SSE41-LABEL: trunc32i16_32i8:
; SSE41: # %bb.0: # %entry
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
; SSE41-NEXT: pand %xmm4, %xmm1
; SSE41-NEXT: pand %xmm4, %xmm0
; SSE41-NEXT: packuswb %xmm1, %xmm0
; SSE41-NEXT: pand %xmm4, %xmm3
; SSE41-NEXT: pand %xmm4, %xmm2
; SSE41-NEXT: packuswb %xmm3, %xmm2
; SSE41-NEXT: movdqu %xmm2, (%rax)
; SSE41-NEXT: movdqu %xmm0, (%rax)
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc32i16_32i8:
; AVX1: # %bb.0: # %entry
Expand Down Expand Up @@ -1579,17 +1601,29 @@ entry:
}

define <32 x i8> @trunc2x16i16_32i8(<16 x i16> %a, <16 x i16> %b) {
; SSE-LABEL: trunc2x16i16_32i8:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE-NEXT: pand %xmm4, %xmm1
; SSE-NEXT: pand %xmm4, %xmm0
; SSE-NEXT: packuswb %xmm1, %xmm0
; SSE-NEXT: pand %xmm4, %xmm3
; SSE-NEXT: pand %xmm2, %xmm4
; SSE-NEXT: packuswb %xmm3, %xmm4
; SSE-NEXT: movdqa %xmm4, %xmm1
; SSE-NEXT: retq
; SSE2-SSSE3-LABEL: trunc2x16i16_32i8:
; SSE2-SSSE3: # %bb.0: # %entry
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
; SSE2-SSSE3-NEXT: pand %xmm2, %xmm4
; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm4
; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
; SSE2-SSSE3-NEXT: retq
;
; SSE41-LABEL: trunc2x16i16_32i8:
; SSE41: # %bb.0: # %entry
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
; SSE41-NEXT: pand %xmm4, %xmm1
; SSE41-NEXT: pand %xmm4, %xmm0
; SSE41-NEXT: packuswb %xmm1, %xmm0
; SSE41-NEXT: pand %xmm4, %xmm3
; SSE41-NEXT: pand %xmm2, %xmm4
; SSE41-NEXT: packuswb %xmm3, %xmm4
; SSE41-NEXT: movdqa %xmm4, %xmm1
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc2x16i16_32i8:
; AVX1: # %bb.0: # %entry
Expand Down Expand Up @@ -1651,13 +1685,21 @@ entry:
}

define <16 x i8> @trunc2x8i16_16i8(<8 x i16> %a, <8 x i16> %b) {
; SSE-LABEL: trunc2x8i16_16i8:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: packuswb %xmm1, %xmm0
; SSE-NEXT: retq
; SSE2-SSSE3-LABEL: trunc2x8i16_16i8:
; SSE2-SSSE3: # %bb.0: # %entry
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
; SSE2-SSSE3-NEXT: retq
;
; SSE41-LABEL: trunc2x8i16_16i8:
; SSE41: # %bb.0: # %entry
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE41-NEXT: pand %xmm2, %xmm1
; SSE41-NEXT: pand %xmm2, %xmm0
; SSE41-NEXT: packuswb %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc2x8i16_16i8:
; AVX1: # %bb.0: # %entry
Expand Down
208 changes: 140 additions & 68 deletions llvm/test/CodeGen/X86/vector-unsigned-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,39 @@
; we don't need to flip the sign bits in order to map to signed pcmpgt*.

define <2 x i1> @ugt_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE-LABEL: ugt_v2i64:
; SSE: # %bb.0:
; SSE-NEXT: psrlq $1, %xmm0
; SSE-NEXT: psrlq $1, %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
; SSE-NEXT: pxor %xmm2, %xmm1
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: movdqa %xmm0, %xmm2
; SSE-NEXT: pcmpgtd %xmm1, %xmm2
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
; SSE-NEXT: pcmpeqd %xmm1, %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE-NEXT: pand %xmm3, %xmm1
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
; SSE-NEXT: por %xmm1, %xmm0
; SSE-NEXT: retq
; SSE2-LABEL: ugt_v2i64:
; SSE2: # %bb.0:
; SSE2-NEXT: psrlq $1, %xmm0
; SSE2-NEXT: psrlq $1, %xmm1
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE2-NEXT: pand %xmm3, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: ugt_v2i64:
; SSE41: # %bb.0:
; SSE41-NEXT: psrlq $1, %xmm0
; SSE41-NEXT: psrlq $1, %xmm1
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648]
; SSE41-NEXT: pxor %xmm2, %xmm1
; SSE41-NEXT: pxor %xmm2, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: pcmpgtd %xmm1, %xmm2
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: pand %xmm3, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: ugt_v2i64:
; AVX: # %bb.0:
Expand All @@ -39,22 +56,39 @@ define <2 x i1> @ugt_v2i64(<2 x i64> %x, <2 x i64> %y) {
}

define <2 x i1> @ult_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE-LABEL: ult_v2i64:
; SSE: # %bb.0:
; SSE-NEXT: psrlq $1, %xmm0
; SSE-NEXT: psrlq $1, %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: pxor %xmm2, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm2
; SSE-NEXT: pcmpgtd %xmm0, %xmm2
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
; SSE-NEXT: pcmpeqd %xmm0, %xmm1
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSE-NEXT: pand %xmm3, %xmm1
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
; SSE-NEXT: por %xmm1, %xmm0
; SSE-NEXT: retq
; SSE2-LABEL: ult_v2i64:
; SSE2: # %bb.0:
; SSE2-NEXT: psrlq $1, %xmm0
; SSE2-NEXT: psrlq $1, %xmm1
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSE2-NEXT: pand %xmm3, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: ult_v2i64:
; SSE41: # %bb.0:
; SSE41-NEXT: psrlq $1, %xmm0
; SSE41-NEXT: psrlq $1, %xmm1
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648]
; SSE41-NEXT: pxor %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm2, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm2
; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSE41-NEXT: pand %xmm3, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: ult_v2i64:
; AVX: # %bb.0:
Expand All @@ -69,24 +103,43 @@ define <2 x i1> @ult_v2i64(<2 x i64> %x, <2 x i64> %y) {
}

define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE-LABEL: uge_v2i64:
; SSE: # %bb.0:
; SSE-NEXT: psrlq $1, %xmm0
; SSE-NEXT: psrlq $1, %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: pxor %xmm2, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm2
; SSE-NEXT: pcmpgtd %xmm0, %xmm2
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
; SSE-NEXT: pcmpeqd %xmm0, %xmm1
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE-NEXT: pand %xmm3, %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE-NEXT: por %xmm0, %xmm1
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
; SSE-NEXT: pxor %xmm1, %xmm0
; SSE-NEXT: retq
; SSE2-LABEL: uge_v2i64:
; SSE2: # %bb.0:
; SSE2-NEXT: psrlq $1, %xmm0
; SSE2-NEXT: psrlq $1, %xmm1
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE2-NEXT: pand %xmm3, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE2-NEXT: por %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: uge_v2i64:
; SSE41: # %bb.0:
; SSE41-NEXT: psrlq $1, %xmm0
; SSE41-NEXT: psrlq $1, %xmm1
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648]
; SSE41-NEXT: pxor %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm2, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm2
; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE41-NEXT: pand %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE41-NEXT: por %xmm0, %xmm1
; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: uge_v2i64:
; AVX: # %bb.0:
Expand All @@ -103,24 +156,43 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) {
}

define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE-LABEL: ule_v2i64:
; SSE: # %bb.0:
; SSE-NEXT: psrlq $1, %xmm0
; SSE-NEXT: psrlq $1, %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
; SSE-NEXT: pxor %xmm2, %xmm1
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: movdqa %xmm0, %xmm2
; SSE-NEXT: pcmpgtd %xmm1, %xmm2
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
; SSE-NEXT: pcmpeqd %xmm1, %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE-NEXT: pand %xmm3, %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE-NEXT: por %xmm0, %xmm1
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
; SSE-NEXT: pxor %xmm1, %xmm0
; SSE-NEXT: retq
; SSE2-LABEL: ule_v2i64:
; SSE2: # %bb.0:
; SSE2-NEXT: psrlq $1, %xmm0
; SSE2-NEXT: psrlq $1, %xmm1
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE2-NEXT: pand %xmm3, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE2-NEXT: por %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: ule_v2i64:
; SSE41: # %bb.0:
; SSE41-NEXT: psrlq $1, %xmm0
; SSE41-NEXT: psrlq $1, %xmm1
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648]
; SSE41-NEXT: pxor %xmm2, %xmm1
; SSE41-NEXT: pxor %xmm2, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: pcmpgtd %xmm1, %xmm2
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE41-NEXT: pand %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE41-NEXT: por %xmm0, %xmm1
; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: ule_v2i64:
; AVX: # %bb.0:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/vselect-pcmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1182,7 +1182,7 @@ define <4 x i64> @blend_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z
; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT: vpmovsxdq {{.*#+}} ymm3 = [2,4,32768,1]
; AVX512F-NEXT: vpmovzxwq {{.*#+}} ymm3 = [2,4,32768,1]
; AVX512F-NEXT: vptestnmq %zmm3, %zmm0, %k1
; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/vselect-post-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
define ptr @test_mul(ptr %addr) {
; AVX2-LABEL: test_mul:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = [255,0,0,0]
; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm0 = [255,0]
; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; AVX2-NEXT: vpblendvb %xmm0, (%rdi), %xmm1, %xmm0
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
Expand Down
238 changes: 156 additions & 82 deletions llvm/test/CodeGen/X86/zero_extend_vector_inreg.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -3311,7 +3311,7 @@ define void @vec384_i8_widen_to_i128_factor16_broadcast_to_v3i128_factor3(ptr %i
; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm0[0,1,0,1]
; AVX2-NEXT: vpmovsxwq {{.*#+}} ymm3 = [255,0,255,0]
; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm3 = [255,0,255,0]
; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm2[2,3]
; AVX2-NEXT: vpmovsxwq {{.*#+}} ymm4 = [18446744073709551360,18446744073709551615,18446744073709551360,18446744073709551615]
Expand Down Expand Up @@ -6043,7 +6043,7 @@ define void @vec512_i8_widen_to_i256_factor32_broadcast_to_v2i256_factor2(ptr %i
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa (%rdi), %ymm0
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX2-NEXT: vmovd {{.*#+}} xmm1 = [255,0,0,0]
; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = [255,0]
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpaddb 32(%rdx), %ymm0, %ymm1
; AVX2-NEXT: vpaddb (%rdx), %ymm0, %ymm0
Expand All @@ -6056,7 +6056,7 @@ define void @vec512_i8_widen_to_i256_factor32_broadcast_to_v2i256_factor2(ptr %i
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vmovd {{.*#+}} xmm1 = [255,0,0,0]
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = [255,0]
; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm1
; AVX512F-NEXT: vpaddb (%rdx), %ymm0, %ymm0
Expand All @@ -6069,7 +6069,7 @@ define void @vec512_i8_widen_to_i256_factor32_broadcast_to_v2i256_factor2(ptr %i
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovd {{.*#+}} xmm1 = [255,0,0,0]
; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = [255,0]
; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm1
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm0, %ymm0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2663,7 +2663,7 @@ define void @vec384_i8_widen_to_i128_factor16_broadcast_to_v3i128_factor3(ptr %i
; AVX2-LABEL: vec384_i8_widen_to_i128_factor16_broadcast_to_v3i128_factor3:
; AVX2: # %bb.0:
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; AVX2-NEXT: vpmovsxwq {{.*#+}} ymm1 = [255,0,255,0]
; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm1 = [255,0,255,0]
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm2 = mem[2,3],ymm0[2,3]
; AVX2-NEXT: vpmovsxwq {{.*#+}} ymm3 = [18446744073709551360,18446744073709551615,18446744073709551360,18446744073709551615]
Expand Down Expand Up @@ -4873,7 +4873,7 @@ define void @vec512_i8_widen_to_i256_factor32_broadcast_to_v2i256_factor2(ptr %i
;
; AVX2-LABEL: vec512_i8_widen_to_i256_factor32_broadcast_to_v2i256_factor2:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = [255,0,0,0]
; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm0 = [255,0]
; AVX2-NEXT: vpand (%rdi), %ymm0, %ymm0
; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
Expand All @@ -4884,7 +4884,7 @@ define void @vec512_i8_widen_to_i256_factor32_broadcast_to_v2i256_factor2(ptr %i
;
; AVX512F-LABEL: vec512_i8_widen_to_i256_factor32_broadcast_to_v2i256_factor2:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovd {{.*#+}} xmm0 = [255,0,0,0]
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm0 = [255,0]
; AVX512F-NEXT: vpand (%rdi), %ymm0, %ymm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
Expand All @@ -4895,7 +4895,7 @@ define void @vec512_i8_widen_to_i256_factor32_broadcast_to_v2i256_factor2(ptr %i
;
; AVX512DQ-LABEL: vec512_i8_widen_to_i256_factor32_broadcast_to_v2i256_factor2:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovd {{.*#+}} xmm0 = [255,0,0,0]
; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm0 = [255,0]
; AVX512DQ-NEXT: vpand (%rdi), %ymm0, %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
Expand Down