221 changes: 148 additions & 73 deletions llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll

Large diffs are not rendered by default.

230 changes: 151 additions & 79 deletions llvm/test/CodeGen/X86/masked_store_trunc_usat.ll

Large diffs are not rendered by default.

115 changes: 69 additions & 46 deletions llvm/test/CodeGen/X86/midpoint-int-vec-128.ll

Large diffs are not rendered by default.

133 changes: 72 additions & 61 deletions llvm/test/CodeGen/X86/midpoint-int-vec-256.ll

Large diffs are not rendered by default.

20 changes: 10 additions & 10 deletions llvm/test/CodeGen/X86/midpoint-int-vec-512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,7 @@ define <64 x i8> @vec512_i8_signed_reg_reg(<64 x i8> %a1, <64 x i8> %a2) nounwin
; AVX512F-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm5
; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm5, %zmm5
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512F-NEXT: vpand %ymm6, %ymm2, %ymm2
; AVX512F-NEXT: vpxor %xmm7, %xmm7, %xmm7
; AVX512F-NEXT: vpsubb %ymm2, %ymm7, %ymm2
Expand Down Expand Up @@ -725,7 +725,7 @@ define <64 x i8> @vec512_i8_signed_reg_reg(<64 x i8> %a1, <64 x i8> %a2) nounwin
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm5
; AVX512VL-FALLBACK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm5, %zmm5
; AVX512VL-FALLBACK-NEXT: vmovdqa {{.*#+}} ymm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512VL-FALLBACK-NEXT: vpbroadcastd {{.*#+}} ymm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512VL-FALLBACK-NEXT: vpand %ymm6, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vpxor %xmm7, %xmm7, %xmm7
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm2, %ymm7, %ymm2
Expand Down Expand Up @@ -780,7 +780,7 @@ define <64 x i8> @vec512_i8_unsigned_reg_reg(<64 x i8> %a1, <64 x i8> %a2) nounw
; AVX512F-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm4
; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm4, %zmm4
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512F-NEXT: vpand %ymm6, %ymm2, %ymm2
; AVX512F-NEXT: vpxor %xmm7, %xmm7, %xmm7
; AVX512F-NEXT: vpsubb %ymm2, %ymm7, %ymm2
Expand Down Expand Up @@ -811,7 +811,7 @@ define <64 x i8> @vec512_i8_unsigned_reg_reg(<64 x i8> %a1, <64 x i8> %a2) nounw
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm4
; AVX512VL-FALLBACK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm4, %zmm4
; AVX512VL-FALLBACK-NEXT: vmovdqa {{.*#+}} ymm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512VL-FALLBACK-NEXT: vpbroadcastd {{.*#+}} ymm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512VL-FALLBACK-NEXT: vpand %ymm6, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vpxor %xmm7, %xmm7, %xmm7
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm2, %ymm7, %ymm2
Expand Down Expand Up @@ -869,7 +869,7 @@ define <64 x i8> @vec512_i8_signed_mem_reg(ptr %a1_addr, <64 x i8> %a2) nounwind
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm5
; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm5, %zmm5
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512F-NEXT: vpand %ymm6, %ymm1, %ymm1
; AVX512F-NEXT: vpxor %xmm7, %xmm7, %xmm7
; AVX512F-NEXT: vpsubb %ymm1, %ymm7, %ymm1
Expand Down Expand Up @@ -901,7 +901,7 @@ define <64 x i8> @vec512_i8_signed_mem_reg(ptr %a1_addr, <64 x i8> %a2) nounwind
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm5
; AVX512VL-FALLBACK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm5, %zmm5
; AVX512VL-FALLBACK-NEXT: vmovdqa {{.*#+}} ymm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512VL-FALLBACK-NEXT: vpbroadcastd {{.*#+}} ymm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512VL-FALLBACK-NEXT: vpand %ymm6, %ymm1, %ymm1
; AVX512VL-FALLBACK-NEXT: vpxor %xmm7, %xmm7, %xmm7
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm1, %ymm7, %ymm1
Expand Down Expand Up @@ -959,7 +959,7 @@ define <64 x i8> @vec512_i8_signed_reg_mem(<64 x i8> %a1, ptr %a2_addr) nounwind
; AVX512F-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm5
; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm5, %zmm5
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512F-NEXT: vpand %ymm6, %ymm2, %ymm2
; AVX512F-NEXT: vpxor %xmm7, %xmm7, %xmm7
; AVX512F-NEXT: vpsubb %ymm2, %ymm7, %ymm2
Expand Down Expand Up @@ -991,7 +991,7 @@ define <64 x i8> @vec512_i8_signed_reg_mem(<64 x i8> %a1, ptr %a2_addr) nounwind
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm5
; AVX512VL-FALLBACK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm5, %zmm5
; AVX512VL-FALLBACK-NEXT: vmovdqa {{.*#+}} ymm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512VL-FALLBACK-NEXT: vpbroadcastd {{.*#+}} ymm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512VL-FALLBACK-NEXT: vpand %ymm6, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vpxor %xmm7, %xmm7, %xmm7
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm2, %ymm7, %ymm2
Expand Down Expand Up @@ -1050,7 +1050,7 @@ define <64 x i8> @vec512_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm5
; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm5, %zmm5
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512F-NEXT: vpand %ymm6, %ymm1, %ymm1
; AVX512F-NEXT: vpxor %xmm7, %xmm7, %xmm7
; AVX512F-NEXT: vpsubb %ymm1, %ymm7, %ymm1
Expand Down Expand Up @@ -1083,7 +1083,7 @@ define <64 x i8> @vec512_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm5
; AVX512VL-FALLBACK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm5, %zmm5
; AVX512VL-FALLBACK-NEXT: vmovdqa {{.*#+}} ymm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512VL-FALLBACK-NEXT: vpbroadcastd {{.*#+}} ymm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512VL-FALLBACK-NEXT: vpand %ymm6, %ymm1, %ymm1
; AVX512VL-FALLBACK-NEXT: vpxor %xmm7, %xmm7, %xmm7
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm1, %ymm7, %ymm1
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/min-legal-vector-width.ll
Original file line number Diff line number Diff line change
Expand Up @@ -918,7 +918,7 @@ define dso_local void @mul256(<64 x i8>* %a, <64 x i8>* %b, <64 x i8>* %c) "min-
; CHECK-AVX512-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; CHECK-AVX512-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; CHECK-AVX512-NEXT: vpmullw %ymm4, %ymm5, %ymm4
; CHECK-AVX512-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; CHECK-AVX512-NEXT: vpbroadcastw {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; CHECK-AVX512-NEXT: vpand %ymm5, %ymm4, %ymm4
; CHECK-AVX512-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; CHECK-AVX512-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
Expand Down Expand Up @@ -995,7 +995,7 @@ define dso_local void @mul512(<64 x i8>* %a, <64 x i8>* %b, <64 x i8>* %c) "min-
; CHECK-AVX512-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
; CHECK-AVX512-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
; CHECK-AVX512-NEXT: vpmullw %zmm2, %zmm3, %zmm2
; CHECK-AVX512-NEXT: vmovdqa64 {{.*#+}} zmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; CHECK-AVX512-NEXT: vpbroadcastw {{.*#+}} zmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; CHECK-AVX512-NEXT: vpandq %zmm3, %zmm2, %zmm2
; CHECK-AVX512-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
; CHECK-AVX512-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/movmsk-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1988,7 +1988,7 @@ define i1 @allones_v2i64_and1(<2 x i64> %arg) {
; KNL-LABEL: allones_v2i64_and1:
; KNL: # %bb.0:
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1]
; KNL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1]
; KNL-NEXT: vptestnmq %zmm1, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: testb $3, %al
Expand Down Expand Up @@ -3193,7 +3193,7 @@ define i1 @allones_v2i64_and4(<2 x i64> %arg) {
; KNL-LABEL: allones_v2i64_and4:
; KNL: # %bb.0:
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4]
; KNL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4,4]
; KNL-NEXT: vptestnmq %zmm1, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: testb $3, %al
Expand Down
6 changes: 4 additions & 2 deletions llvm/test/CodeGen/X86/oddshuffles.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1994,7 +1994,8 @@ define void @splat3_128(<16 x i8> %a0, <16 x i8> %a1, ptr%a2) {
; AVX2-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[5,6,7,8,9,10,11,12,13,14,15],ymm2[0,1,2,3,4],ymm1[21,22,23,24,25,26,27,28,29,30,31],ymm2[16,17,18,19,20]
; AVX2-NEXT: vpalignr {{.*#+}} ymm2 = ymm2[5,6,7,8,9,10,11,12,13,14,15],ymm3[0,1,2,3,4],ymm2[21,22,23,24,25,26,27,28,29,30,31],ymm3[16,17,18,19,20]
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3
; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
; AVX2-NEXT: # ymm4 = mem[0,1,0,1]
; AVX2-NEXT: vpshufb %ymm4, %ymm3, %ymm3
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
; AVX2-NEXT: vpshufb %ymm4, %ymm0, %ymm0
Expand Down Expand Up @@ -2165,7 +2166,8 @@ define void @splat3_256(<32 x i8> %a0, ptr%a1) {
; AVX2-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[5,6,7,8,9,10,11,12,13,14,15],ymm2[0,1,2,3,4],ymm1[21,22,23,24,25,26,27,28,29,30,31],ymm2[16,17,18,19,20]
; AVX2-NEXT: vpalignr {{.*#+}} ymm2 = ymm2[5,6,7,8,9,10,11,12,13,14,15],ymm3[0,1,2,3,4],ymm2[21,22,23,24,25,26,27,28,29,30,31],ymm3[16,17,18,19,20]
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3
; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
; AVX2-NEXT: # ymm4 = mem[0,1,0,1]
; AVX2-NEXT: vpshufb %ymm4, %ymm3, %ymm3
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
; AVX2-NEXT: vpshufb %ymm4, %ymm0, %ymm0
Expand Down
52 changes: 26 additions & 26 deletions llvm/test/CodeGen/X86/paddus.ll
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ define <32 x i8> @test8(<32 x i8> %x) {
; AVX1-LABEL: test8:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX1-NEXT: vpaddusb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpaddusb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand Down Expand Up @@ -251,7 +251,7 @@ define <32 x i8> @test9(<32 x i8> %x) {
; AVX1-LABEL: test9:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129]
; AVX1-NEXT: vpaddusb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpaddusb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand Down Expand Up @@ -283,7 +283,7 @@ define <32 x i8> @test10(<32 x i8> %x) {
; AVX1-LABEL: test10:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254]
; AVX1-NEXT: vpaddusb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpaddusb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand Down Expand Up @@ -373,7 +373,7 @@ define <32 x i8> @test12(<32 x i8> %x) {
; AVX1-LABEL: test12:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; AVX1-NEXT: vpaddusb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpaddusb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand Down Expand Up @@ -474,7 +474,7 @@ define <64 x i8> @test14(<64 x i8> %x) {
; AVX1-LABEL: test14:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX1-NEXT: vpaddusb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpaddusb %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
Expand All @@ -486,7 +486,7 @@ define <64 x i8> @test14(<64 x i8> %x) {
;
; AVX2-LABEL: test14:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX2-NEXT: vpaddusb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddusb %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
Expand Down Expand Up @@ -514,7 +514,7 @@ define <64 x i8> @test15(<64 x i8> %x) {
; AVX1-LABEL: test15:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129]
; AVX1-NEXT: vpaddusb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpaddusb %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
Expand All @@ -526,7 +526,7 @@ define <64 x i8> @test15(<64 x i8> %x) {
;
; AVX2-LABEL: test15:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129]
; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm2 = [129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129]
; AVX2-NEXT: vpaddusb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddusb %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
Expand Down Expand Up @@ -554,7 +554,7 @@ define <64 x i8> @test16(<64 x i8> %x) {
; AVX1-LABEL: test16:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254]
; AVX1-NEXT: vpaddusb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpaddusb %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
Expand All @@ -566,7 +566,7 @@ define <64 x i8> @test16(<64 x i8> %x) {
;
; AVX2-LABEL: test16:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254]
; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm2 = [254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254]
; AVX2-NEXT: vpaddusb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddusb %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
Expand Down Expand Up @@ -641,7 +641,7 @@ define <64 x i8> @test17(<64 x i8> %x) {
;
; AVX2-LABEL: test17:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2
; AVX2-NEXT: vpmaxub %ymm1, %ymm2, %ymm1
Expand Down Expand Up @@ -682,7 +682,7 @@ define <64 x i8> @test18(<64 x i8> %x) {
; AVX1-LABEL: test18:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; AVX1-NEXT: vpaddusb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpaddusb %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
Expand All @@ -694,7 +694,7 @@ define <64 x i8> @test18(<64 x i8> %x) {
;
; AVX2-LABEL: test18:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; AVX2-NEXT: vpaddusb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddusb %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
Expand Down Expand Up @@ -939,7 +939,7 @@ define <16 x i16> @test26(<16 x i16> %x) {
; AVX1-LABEL: test26:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX1-NEXT: vpaddusw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpaddusw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand Down Expand Up @@ -971,7 +971,7 @@ define <16 x i16> @test27(<16 x i16> %x) {
; AVX1-LABEL: test27:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [32769,32769,32769,32769,32769,32769,32769,32769]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [32769,32769,32769,32769,32769,32769,32769,32769]
; AVX1-NEXT: vpaddusw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpaddusw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand Down Expand Up @@ -1003,7 +1003,7 @@ define <16 x i16> @test28(<16 x i16> %x) {
; AVX1-LABEL: test28:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [65534,65534,65534,65534,65534,65534,65534,65534]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [65534,65534,65534,65534,65534,65534,65534,65534]
; AVX1-NEXT: vpaddusw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpaddusw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand Down Expand Up @@ -1125,7 +1125,7 @@ define <16 x i16> @test30(<16 x i16> %x) {
; AVX1-LABEL: test30:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2]
; AVX1-NEXT: vpaddusw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpaddusw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand Down Expand Up @@ -1226,7 +1226,7 @@ define <32 x i16> @test32(<32 x i16> %x) {
; AVX1-LABEL: test32:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX1-NEXT: vpaddusw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpaddusw %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
Expand All @@ -1238,7 +1238,7 @@ define <32 x i16> @test32(<32 x i16> %x) {
;
; AVX2-LABEL: test32:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767]
; AVX2-NEXT: vpaddusw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddusw %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
Expand Down Expand Up @@ -1266,7 +1266,7 @@ define <32 x i16> @test33(<32 x i16> %x) {
; AVX1-LABEL: test33:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [32769,32769,32769,32769,32769,32769,32769,32769]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [32769,32769,32769,32769,32769,32769,32769,32769]
; AVX1-NEXT: vpaddusw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpaddusw %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
Expand All @@ -1278,7 +1278,7 @@ define <32 x i16> @test33(<32 x i16> %x) {
;
; AVX2-LABEL: test33:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [32769,32769,32769,32769,32769,32769,32769,32769,32769,32769,32769,32769,32769,32769,32769,32769]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [32769,32769,32769,32769,32769,32769,32769,32769,32769,32769,32769,32769,32769,32769,32769,32769]
; AVX2-NEXT: vpaddusw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddusw %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
Expand Down Expand Up @@ -1306,7 +1306,7 @@ define <32 x i16> @test34(<32 x i16> %x) {
; AVX1-LABEL: test34:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [65534,65534,65534,65534,65534,65534,65534,65534]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [65534,65534,65534,65534,65534,65534,65534,65534]
; AVX1-NEXT: vpaddusw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpaddusw %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
Expand All @@ -1318,7 +1318,7 @@ define <32 x i16> @test34(<32 x i16> %x) {
;
; AVX2-LABEL: test34:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [65534,65534,65534,65534,65534,65534,65534,65534,65534,65534,65534,65534,65534,65534,65534,65534]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [65534,65534,65534,65534,65534,65534,65534,65534,65534,65534,65534,65534,65534,65534,65534,65534]
; AVX2-NEXT: vpaddusw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddusw %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
Expand Down Expand Up @@ -1449,7 +1449,7 @@ define <32 x i16> @test35(<32 x i16> %x) {
;
; AVX2-LABEL: test35:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2
; AVX2-NEXT: vpmaxuw %ymm1, %ymm2, %ymm1
Expand Down Expand Up @@ -1490,7 +1490,7 @@ define <32 x i16> @test36(<32 x i16> %x) {
; AVX1-LABEL: test36:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [2,2,2,2,2,2,2,2]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [2,2,2,2,2,2,2,2]
; AVX1-NEXT: vpaddusw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpaddusw %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
Expand All @@ -1502,7 +1502,7 @@ define <32 x i16> @test36(<32 x i16> %x) {
;
; AVX2-LABEL: test36:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; AVX2-NEXT: vpaddusw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddusw %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
Expand Down
7 changes: 4 additions & 3 deletions llvm/test/CodeGen/X86/pmaddubsw.ll
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,8 @@ define <8 x i16> @pmaddubsw_bad_extend(ptr %Aptr, ptr %Bptr) {
; AVX1: # %bb.0:
; AVX1-NEXT: vmovdqa (%rdi), %xmm0
; AVX1-NEXT: vmovdqa (%rsi), %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14]
; AVX1-NEXT: # xmm2 = mem[0,0]
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm3
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm2
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
Expand Down Expand Up @@ -348,9 +349,9 @@ define <8 x i16> @pmaddubsw_bad_extend(ptr %Aptr, ptr %Bptr) {
; AVX256: # %bb.0:
; AVX256-NEXT: vmovdqa (%rdi), %xmm0
; AVX256-NEXT: vmovdqa (%rsi), %xmm1
; AVX256-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
; AVX256-NEXT: vpbroadcastq {{.*#+}} xmm2 = [0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14]
; AVX256-NEXT: vpshufb %xmm2, %xmm0, %xmm3
; AVX256-NEXT: vmovdqa {{.*#+}} xmm4 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u>
; AVX256-NEXT: vpbroadcastq {{.*#+}} xmm4 = [1,3,5,7,9,11,13,15,1,3,5,7,9,11,13,15]
; AVX256-NEXT: vpshufb %xmm4, %xmm0, %xmm0
; AVX256-NEXT: vpshufb %xmm2, %xmm1, %xmm2
; AVX256-NEXT: vpshufb %xmm4, %xmm1, %xmm1
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/X86/pmul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ define <2 x i64> @mul_v2i64c(<2 x i64> %i) nounwind {
;
; AVX-LABEL: mul_v2i64c:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [117,117]
; AVX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [117,117]
; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0
; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
Expand Down Expand Up @@ -417,9 +417,9 @@ define <32 x i8> @mul_v32i8c(<32 x i8> %i) nounwind {
; AVX2-LABEL: mul_v32i8c:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
Expand All @@ -430,9 +430,9 @@ define <32 x i8> @mul_v32i8c(<32 x i8> %i) nounwind {
; AVX512F-LABEL: mul_v32i8c:
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX512F-NEXT: vpmullw %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX512F-NEXT: vpmullw %ymm2, %ymm0, %ymm0
Expand Down Expand Up @@ -593,7 +593,7 @@ define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind {
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX2-NEXT: vpmullw %ymm2, %ymm3, %ymm2
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
Expand All @@ -607,7 +607,7 @@ define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind {
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX512F-NEXT: vpmullw %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
Expand Down Expand Up @@ -799,9 +799,9 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %i) nounwind {
; AVX2-LABEL: mul_v64i8c:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm3 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX2-NEXT: vpmullw %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX2-NEXT: vpmullw %ymm3, %ymm0, %ymm0
Expand All @@ -820,9 +820,9 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %i) nounwind {
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm3 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX512F-NEXT: vpmullw %ymm3, %ymm1, %ymm1
Expand All @@ -841,9 +841,9 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %i) nounwind {
; AVX512BW-LABEL: mul_v64i8c:
; AVX512BW: # %bb.0: # %entry
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm1 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX512BW-NEXT: vpmullw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512BW-NEXT: vpandq %zmm3, %zmm1, %zmm1
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
; AVX512BW-NEXT: vpmullw %zmm2, %zmm0, %zmm0
Expand Down Expand Up @@ -955,7 +955,7 @@ define <64 x i8> @mul_v64i8(<64 x i8> %i, <64 x i8> %j) nounwind {
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX2-NEXT: vpmullw %ymm4, %ymm5, %ymm4
; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
Expand All @@ -980,7 +980,7 @@ define <64 x i8> @mul_v64i8(<64 x i8> %i, <64 x i8> %j) nounwind {
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX512F-NEXT: vpmullw %ymm3, %ymm5, %ymm3
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpand %ymm5, %ymm3, %ymm3
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
Expand All @@ -1004,7 +1004,7 @@ define <64 x i8> @mul_v64i8(<64 x i8> %i, <64 x i8> %j) nounwind {
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
; AVX512BW-NEXT: vpmullw %zmm2, %zmm3, %zmm2
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm2
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/pmulh.ll
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ define <16 x i16> @and_mulhuw_v16i16(<16 x i32> %a, <16 x i32> %b) {
; AVX512F-LABEL: and_mulhuw_v16i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767]
; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm2 = [32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767]
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/pr31773.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
; AVX-LABEL: usat_trunc_wb_256:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; AVX-NEXT: vpminuw %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpminuw %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
Expand All @@ -30,7 +30,7 @@ define <8 x i16> @usat_trunc_dw_256(<8 x i32> %i) {
; AVX-LABEL: usat_trunc_dw_256:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [65535,65535,65535,65535]
; AVX-NEXT: vpminud %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpminud %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/pr37499.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
define <2 x i64> @undef_tval() {
; CHECK-LABEL: undef_tval:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [1,1,1,1,1,1,1,1]
; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm0 = [1,1,1,1,1,1,1,1]
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpmovqw %zmm0, %xmm0 {%k1}
Expand All @@ -18,7 +18,7 @@ define <2 x i64> @undef_tval() {
define <2 x i64> @foo(<8 x i64> %x) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1]
; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1]
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpmovqw %zmm0, %xmm1 {%k1}
Expand All @@ -33,7 +33,7 @@ define <2 x i64> @foo(<8 x i64> %x) {
define <4 x i64> @goo(<16 x i32> %x) {
; CHECK-LABEL: goo:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; CHECK-NEXT: movw $1, %ax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpmovdw %zmm0, %ymm1 {%k1}
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/pr63108.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ define i32 @PR63108() {
; AVX1-NEXT: testb %al, %al
; AVX1-NEXT: je .LBB0_2
; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = <251,223,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm0 = [251,223,0,0,251,223,0,0,251,223,0,0,251,223,0,0]
; AVX1-NEXT: jmp .LBB0_5
; AVX1-NEXT: .LBB0_2: # %vector.body.preheader
; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [57339,0,0,0]
Expand Down Expand Up @@ -83,7 +83,7 @@ define i32 @PR63108() {
; AVX2-NEXT: testb %al, %al
; AVX2-NEXT: je .LBB0_2
; AVX2-NEXT: # %bb.1:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm0 = <251,223,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm0 = [251,223,251,223,251,223,251,223,251,223,251,223,251,223,251,223]
; AVX2-NEXT: jmp .LBB0_5
; AVX2-NEXT: .LBB0_2: # %vector.body.preheader
; AVX2-NEXT: vmovdqa {{.*#+}} xmm0 = [57339,0,0,0]
Expand Down Expand Up @@ -120,7 +120,7 @@ define i32 @PR63108() {
; AVX512-NEXT: testb %al, %al
; AVX512-NEXT: je .LBB0_2
; AVX512-NEXT: # %bb.1:
; AVX512-NEXT: vmovdqa {{.*#+}} xmm0 = <251,223,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512-NEXT: vpbroadcastw {{.*#+}} xmm0 = [251,223,251,223,251,223,251,223,251,223,251,223,251,223,251,223]
; AVX512-NEXT: jmp .LBB0_5
; AVX512-NEXT: .LBB0_2: # %vector.body.preheader
; AVX512-NEXT: vmovdqa {{.*#+}} xmm0 = [57339,0,0,0]
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/X86/prefer-avx256-lzcnt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ define <16 x i16> @testv16i16(<16 x i16> %in) {
define <32 x i8> @testv32i8(<32 x i8> %in) {
; AVX256-LABEL: testv32i8:
; AVX256: # %bb.0:
; AVX256-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX256-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX256-NEXT: # ymm1 = mem[0,1,0,1]
; AVX256-NEXT: vpshufb %ymm0, %ymm1, %ymm2
; AVX256-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX256-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
Expand Down
58 changes: 44 additions & 14 deletions llvm/test/CodeGen/X86/prefer-avx256-popcnt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ define <8 x i16> @testv8i16(<8 x i16> %in) {
define <16 x i8> @testv16i8(<16 x i8> %in) {
; AVX256-LABEL: testv16i8:
; AVX256: # %bb.0:
; AVX256-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX256-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX256-NEXT: vpand %xmm1, %xmm0, %xmm2
; AVX256-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX256-NEXT: vpshufb %xmm2, %xmm3, %xmm2
Expand All @@ -60,9 +60,10 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
define <16 x i16> @testv16i16(<16 x i16> %in) {
; AVX256-LABEL: testv16i16:
; AVX256: # %bb.0:
; AVX256-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX256-NEXT: vpbroadcastd {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX256-NEXT: vpand %ymm1, %ymm0, %ymm2
; AVX256-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX256-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX256-NEXT: # ymm3 = mem[0,1,0,1]
; AVX256-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX256-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX256-NEXT: vpand %ymm1, %ymm0, %ymm0
Expand All @@ -84,17 +85,44 @@ define <16 x i16> @testv16i16(<16 x i16> %in) {
}

define <32 x i8> @testv32i8(<32 x i8> %in) {
; CHECK-LABEL: testv32i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; CHECK-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; CHECK-NEXT: vpsrlw $4, %ymm0, %ymm0
; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; CHECK-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; CHECK-NEXT: retq
; AVX256-LABEL: testv32i8:
; AVX256: # %bb.0:
; AVX256-NEXT: vpbroadcastd {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX256-NEXT: vpand %ymm1, %ymm0, %ymm2
; AVX256-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX256-NEXT: # ymm3 = mem[0,1,0,1]
; AVX256-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX256-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX256-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX256-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; AVX256-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX256-NEXT: retq
;
; AVX512VL-LABEL: testv32i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512VL-NEXT: vpand %ymm1, %ymm0, %ymm2
; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512VL-NEXT: # ymm3 = mem[0,1,0,1]
; AVX512VL-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512VL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; AVX512VL-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512F-LABEL: testv32i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2
; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512F-NEXT: # ymm3 = mem[0,1,0,1]
; AVX512F-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; AVX512F-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: retq
%out = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %in)
ret <32 x i8> %out
}
Expand All @@ -103,3 +131,5 @@ declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) {
; AVX256BW: # %bb.0:
; AVX256BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX256BW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; AVX256BW-NEXT: vmovdqa {{.*#+}} ymm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
; AVX256BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
; AVX256BW-NEXT: vpmullw %ymm3, %ymm2, %ymm2
; AVX256BW-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX256BW-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
Expand Down Expand Up @@ -61,7 +61,7 @@ define <32 x i8> @test_mul_32i8(<32 x i8> %a, <32 x i8> %b) {
; AVX256BW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX256BW-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX256BW-NEXT: vpmullw %ymm2, %ymm3, %ymm2
; AVX256BW-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX256BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX256BW-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX256BW-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX256BW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
Expand Down
58 changes: 32 additions & 26 deletions llvm/test/CodeGen/X86/psubus.ll
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ define <4 x i32> @ashr_xor_and_custom(<4 x i32> %x) nounwind {
;
; AVX1-LABEL: ashr_xor_and_custom:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
Expand Down Expand Up @@ -172,7 +172,7 @@ define <4 x i32> @ashr_add_and_custom(<4 x i32> %x) nounwind {
;
; AVX1-LABEL: ashr_add_and_custom:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
Expand Down Expand Up @@ -215,7 +215,7 @@ define <4 x i32> @usubsat_custom(<4 x i32> %x) nounwind {
;
; AVX1-LABEL: usubsat_custom:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = <2147483648,2147483648,2147483648,u>
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
Expand Down Expand Up @@ -391,7 +391,7 @@ define <16 x i16> @test7(<16 x i16> %x) nounwind {
; AVX1-LABEL: test7:
; AVX1: # %bb.0: # %vector.ph
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX1-NEXT: vpsubusw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsubusw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand Down Expand Up @@ -424,7 +424,7 @@ define <16 x i16> @ashr_xor_and_v16i16(<16 x i16> %x) nounwind {
; AVX1-LABEL: ashr_xor_and_v16i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX1-NEXT: vpsubusw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsubusw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand Down Expand Up @@ -456,7 +456,7 @@ define <16 x i16> @ashr_add_and_v16i16(<16 x i16> %x) nounwind {
; AVX1-LABEL: ashr_add_and_v16i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX1-NEXT: vpsubusw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsubusw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand Down Expand Up @@ -488,7 +488,7 @@ define <16 x i16> @test8(<16 x i16> %x) nounwind {
; AVX1-LABEL: test8:
; AVX1: # %bb.0: # %vector.ph
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX1-NEXT: vpsubusw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsubusw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand Down Expand Up @@ -594,7 +594,7 @@ define <32 x i8> @test10(<32 x i8> %x) nounwind {
; AVX1-LABEL: test10:
; AVX1: # %bb.0: # %vector.ph
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX1-NEXT: vpsubusb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsubusb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand Down Expand Up @@ -627,7 +627,7 @@ define <32 x i8> @test11(<32 x i8> %x) nounwind {
; AVX1-LABEL: test11:
; AVX1: # %bb.0: # %vector.ph
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX1-NEXT: vpsubusb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsubusb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand Down Expand Up @@ -803,7 +803,7 @@ define <8 x i16> @test13(<8 x i16> %x, <8 x i32> %y) nounwind {
; AVX1-LABEL: test13:
; AVX1: # %bb.0: # %vector.ph
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [65535,65535,65535,65535]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [65535,65535,65535,65535]
; AVX1-NEXT: vpminud %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpminud %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
Expand Down Expand Up @@ -1069,7 +1069,7 @@ define <8 x i16> @test15(<8 x i16> %x, <8 x i32> %y) nounwind {
; AVX1-LABEL: test15:
; AVX1: # %bb.0: # %vector.ph
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [65535,65535,65535,65535]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [65535,65535,65535,65535]
; AVX1-NEXT: vpminud %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpminud %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
Expand Down Expand Up @@ -1592,7 +1592,7 @@ define <8 x i16> @psubus_8i32_max(<8 x i16> %x, <8 x i32> %y) nounwind {
; AVX1-LABEL: psubus_8i32_max:
; AVX1: # %bb.0: # %vector.ph
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [65535,65535,65535,65535]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [65535,65535,65535,65535]
; AVX1-NEXT: vpminud %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpminud %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
Expand Down Expand Up @@ -1742,9 +1742,11 @@ define <8 x i16> @psubus_8i64_max(<8 x i16> %x, <8 x i64> %y) nounwind {
; AVX1-LABEL: psubus_8i64_max:
; AVX1: # %bb.0: # %vector.ph
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: # xmm4 = mem[0,0]
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm5
; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [9223372036854841343,9223372036854841343]
; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [9223372036854841343,9223372036854841343]
; AVX1-NEXT: # xmm6 = mem[0,0]
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [65535,65535]
; AVX1-NEXT: # xmm7 = mem[0,0]
Expand Down Expand Up @@ -1862,7 +1864,7 @@ define <16 x i16> @psubus_16i32_max(<16 x i16> %x, <16 x i32> %y) nounwind {
; AVX1-LABEL: psubus_16i32_max:
; AVX1: # %bb.0: # %vector.ph
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [65535,65535,65535,65535]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [65535,65535,65535,65535]
; AVX1-NEXT: vpminud %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vpminud %xmm4, %xmm2, %xmm2
; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2
Expand Down Expand Up @@ -1962,7 +1964,7 @@ define <8 x i16> @psubus_i16_i32_max_swapped(<8 x i16> %x, <8 x i32> %y) nounwin
; AVX1-LABEL: psubus_i16_i32_max_swapped:
; AVX1: # %bb.0: # %vector.ph
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [65535,65535,65535,65535]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [65535,65535,65535,65535]
; AVX1-NEXT: vpminud %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpminud %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
Expand Down Expand Up @@ -2057,7 +2059,7 @@ define <8 x i16> @psubus_i16_i32_min(<8 x i16> %x, <8 x i32> %y) nounwind {
; AVX1-LABEL: psubus_i16_i32_min:
; AVX1: # %bb.0: # %vector.ph
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [65535,65535,65535,65535]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [65535,65535,65535,65535]
; AVX1-NEXT: vpminud %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpminud %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
Expand Down Expand Up @@ -2303,7 +2305,7 @@ define <32 x i8> @test23(<32 x i8> %x) {
; AVX1-LABEL: test23:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70]
; AVX1-NEXT: vpsubusb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsubusb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand Down Expand Up @@ -2367,7 +2369,7 @@ define <16 x i16> @test25(<16 x i16> %x) {
; AVX1-LABEL: test25:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [5000,5000,5000,5000,5000,5000,5000,5000]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [5000,5000,5000,5000,5000,5000,5000,5000]
; AVX1-NEXT: vpsubusw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsubusw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand Down Expand Up @@ -2433,7 +2435,7 @@ define <64 x i8> @test27(<64 x i8> %x) {
; AVX1-LABEL: test27:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154]
; AVX1-NEXT: vpsubusb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpsubusb %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
Expand All @@ -2445,7 +2447,7 @@ define <64 x i8> @test27(<64 x i8> %x) {
;
; AVX2-LABEL: test27:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154]
; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm2 = [154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154]
; AVX2-NEXT: vpsubusb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpsubusb %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
Expand Down Expand Up @@ -2646,7 +2648,7 @@ define <8 x i16> @test32(<8 x i16> %a0, <8 x i32> %a1) {
; AVX1-LABEL: test32:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [65535,65535,65535,65535]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [65535,65535,65535,65535]
; AVX1-NEXT: vpminud %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpminud %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
Expand Down Expand Up @@ -2800,9 +2802,11 @@ define <8 x i32> @test33(<8 x i32> %a0, <8 x i64> %a1) {
; AVX1-LABEL: test33:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: # xmm4 = mem[0,0]
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm5
; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [9223372041149743103,9223372041149743103]
; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [9223372041149743103,9223372041149743103]
; AVX1-NEXT: # xmm6 = mem[0,0]
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [4294967295,4294967295]
; AVX1-NEXT: # xmm7 = mem[0,0]
Expand Down Expand Up @@ -3025,9 +3029,11 @@ define <8 x i32> @test34(<8 x i32> %a0, <8 x i64> %a1) {
; AVX1: # %bb.0:
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: # xmm4 = mem[0,0]
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm5
; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [9223372041149743103,9223372041149743103]
; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [9223372041149743103,9223372041149743103]
; AVX1-NEXT: # xmm6 = mem[0,0]
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [4294967295,4294967295]
; AVX1-NEXT: # xmm7 = mem[0,0]
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/sadd_sat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -515,58 +515,58 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
; AVX1-LABEL: v16i4:
; AVX1: # %bb.0:
; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsllw $4, %xmm0, %xmm0
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: v16i4:
; AVX2: # %bb.0:
; AVX2-NEXT: vpsllw $4, %xmm1, %xmm1
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpsllw $4, %xmm0, %xmm0
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: v16i4:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsllw $4, %xmm1, %xmm1
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512F-NEXT: vpbroadcastb {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0
; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512F-NEXT: vpbroadcastb {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512F-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v16i4:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %xmm1, %xmm1
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512BW-NEXT: vpbroadcastb {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0
; AVX512BW-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512BW-NEXT: vpbroadcastb {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512BW-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
; AVX512BW-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/sat-add.ll
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ define <2 x i64> @unsigned_sat_constant_v2i64_using_min(<2 x i64> %x) {
; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = [18446744073709551573,18446744073709551573]
; AVX2-NEXT: # xmm1 = mem[0,0]
; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775765,9223372036854775765]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854775765,9223372036854775765]
; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
Expand Down Expand Up @@ -726,7 +726,7 @@ define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_sum(<2 x i64> %x) {
;
; AVX2-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm2
; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1
Expand Down Expand Up @@ -785,7 +785,7 @@ define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_notval(<2 x i64> %x) {
;
; AVX2-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm2
; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1
Expand Down Expand Up @@ -1267,7 +1267,7 @@ define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_sum(<2 x i64> %x, <2 x i
;
; AVX2-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm1
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/setcc-non-simple-type.ll
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ define void @failing(ptr %0, ptr %1) nounwind {
; CHECK-AVX2-NEXT: movq 32(%rsi), %rdx
; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
; CHECK-AVX2-NEXT: xorl %esi, %esi
; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1]
; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [2,2]
; CHECK-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1]
; CHECK-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [2,2]
; CHECK-AVX2-NEXT: .p2align 4, 0x90
; CHECK-AVX2-NEXT: .LBB0_1: # %vector.ph
; CHECK-AVX2-NEXT: # =>This Loop Header: Depth=1
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/shrink_vmul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2134,7 +2134,7 @@ define void @PR34947(ptr %p0, ptr %p1) nounwind {
; X86-AVX1-NEXT: imull $8199, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-AVX1-NEXT: # imm = 0x2007
; X86-AVX1-NEXT: movl %eax, (%eax)
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8199,8199,8199,8199]
; X86-AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [8199,8199,8199,8199]
; X86-AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm0
; X86-AVX1-NEXT: vpmulld %xmm2, %xmm1, %xmm1
; X86-AVX1-NEXT: vmovdqa %xmm1, (%eax)
Expand Down Expand Up @@ -2337,7 +2337,7 @@ define void @PR34947(ptr %p0, ptr %p1) nounwind {
; X64-AVX1-NEXT: vpinsrd $1, %ebp, %xmm0, %xmm0
; X64-AVX1-NEXT: vpinsrd $2, %ebx, %xmm0, %xmm0
; X64-AVX1-NEXT: vpinsrd $3, %r11d, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8199,8199,8199,8199]
; X64-AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [8199,8199,8199,8199]
; X64-AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %r10d, %xmm2
; X64-AVX1-NEXT: vpinsrd $1, %r9d, %xmm2, %xmm2
Expand Down
348 changes: 234 additions & 114 deletions llvm/test/CodeGen/X86/shuffle-strided-with-offset-256.ll

Large diffs are not rendered by default.

183 changes: 121 additions & 62 deletions llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ define <4 x double> @PR34175(ptr %p) {
;
; AVX512BW-LABEL: PR34175:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <0,8,32,40,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm0 = [0,8,32,40,0,8,32,40,0,8,32,40,0,8,32,40]
; AVX512BW-NEXT: vmovdqu (%rdi), %ymm1
; AVX512BW-NEXT: vmovdqu 32(%rdi), %ymm2
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
Expand All @@ -392,7 +392,7 @@ define <4 x double> @PR34175(ptr %p) {
;
; AVX512BWVL-LABEL: PR34175:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm0 = <0,8,16,24,u,u,u,u>
; AVX512BWVL-NEXT: vpbroadcastq {{.*#+}} xmm0 = [0,8,16,24,0,8,16,24]
; AVX512BWVL-NEXT: vmovdqu (%rdi), %ymm1
; AVX512BWVL-NEXT: vpermt2w 32(%rdi), %ymm0, %ymm1
; AVX512BWVL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
Expand All @@ -401,7 +401,7 @@ define <4 x double> @PR34175(ptr %p) {
;
; AVX512VBMI-LABEL: PR34175:
; AVX512VBMI: # %bb.0:
; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm0 = <0,8,32,40,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} ymm0 = [0,8,32,40,0,8,32,40,0,8,32,40,0,8,32,40]
; AVX512VBMI-NEXT: vmovdqu (%rdi), %ymm1
; AVX512VBMI-NEXT: vmovdqu 32(%rdi), %ymm2
; AVX512VBMI-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
Expand All @@ -411,7 +411,7 @@ define <4 x double> @PR34175(ptr %p) {
;
; AVX512VBMIVL-LABEL: PR34175:
; AVX512VBMIVL: # %bb.0:
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} xmm0 = <0,8,16,24,u,u,u,u>
; AVX512VBMIVL-NEXT: vpbroadcastq {{.*#+}} xmm0 = [0,8,16,24,0,8,16,24]
; AVX512VBMIVL-NEXT: vmovdqu (%rdi), %ymm1
; AVX512VBMIVL-NEXT: vpermt2w 32(%rdi), %ymm0, %ymm1
; AVX512VBMIVL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/slow-pmulld.ll
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ define <4 x i32> @test_mul_v4i32_v4i16(<4 x i16> %A) {
;
; AVX2-SLOW-LABEL: test_mul_v4i32_v4i16:
; AVX2-SLOW: # %bb.0:
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = <18778,18778,18778,18778,u,u,u,u>
; AVX2-SLOW-NEXT: vpbroadcastw {{.*#+}} xmm1 = [18778,18778,18778,18778,18778,18778,18778,18778]
; AVX2-SLOW-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2
; AVX2-SLOW-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
Expand Down Expand Up @@ -407,7 +407,7 @@ define <8 x i32> @test_mul_v8i32_v8i16(<8 x i16> %A) {
;
; AVX2-SLOW-LABEL: test_mul_v8i32_v8i16:
; AVX2-SLOW: # %bb.0:
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [18778,18778,18778,18778,18778,18778,18778,18778]
; AVX2-SLOW-NEXT: vpbroadcastw {{.*#+}} xmm1 = [18778,18778,18778,18778,18778,18778,18778,18778]
; AVX2-SLOW-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2
; AVX2-SLOW-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
Expand Down Expand Up @@ -490,7 +490,7 @@ define <16 x i32> @test_mul_v16i32_v16i16(<16 x i16> %A) {
;
; AVX2-SLOW-LABEL: test_mul_v16i32_v16i16:
; AVX2-SLOW: # %bb.0:
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm1 = [18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778]
; AVX2-SLOW-NEXT: vpbroadcastw {{.*#+}} ymm1 = [18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778]
; AVX2-SLOW-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2
; AVX2-SLOW-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15]
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/splat-for-size.ll
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ define <16 x i16> @splat_v16i16(<16 x i16> %x) #0 {
; AVX-LABEL: splat_v16i16:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2]
; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2]
; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand All @@ -293,7 +293,7 @@ define <16 x i16> @splat_v16i16_pgso(<16 x i16> %x) !prof !14 {
; AVX-LABEL: splat_v16i16_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2]
; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2]
; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand Down Expand Up @@ -344,7 +344,7 @@ define <32 x i8> @splat_v32i8(<32 x i8> %x) #0 {
; AVX-LABEL: splat_v32i8:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand All @@ -363,7 +363,7 @@ define <32 x i8> @splat_v32i8_pgso(<32 x i8> %x) !prof !14 {
; AVX-LABEL: splat_v32i8_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2362,12 +2362,12 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
; CHECK-AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5
; CHECK-AVX1-NEXT: vpackuswb %xmm4, %xmm5, %xmm4
; CHECK-AVX1-NEXT: vpsrlw $7, %xmm3, %xmm3
; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; CHECK-AVX1-NEXT: vbroadcastss {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; CHECK-AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3
; CHECK-AVX1-NEXT: vpaddb %xmm3, %xmm4, %xmm4
; CHECK-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm6
; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; CHECK-AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; CHECK-AVX1-NEXT: vpand %xmm3, %xmm6, %xmm6
; CHECK-AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4
Expand Down Expand Up @@ -2434,7 +2434,7 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
; CHECK-AVX2-NEXT: vpackuswb %ymm3, %ymm4, %ymm3
; CHECK-AVX2-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; CHECK-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4
; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; CHECK-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; CHECK-AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4
; CHECK-AVX2-NEXT: vpshufb {{.*#+}} ymm6 = ymm0[8],zero,ymm0[9],zero,zero,zero,ymm0[11],zero,zero,zero,ymm0[13],zero,zero,zero,ymm0[15],zero,zero,zero,ymm0[25],zero,zero,zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,zero,zero,ymm0[31],zero
; CHECK-AVX2-NEXT: vpackuswb %ymm6, %ymm4, %ymm4
Expand Down Expand Up @@ -2471,7 +2471,7 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
; CHECK-AVX512VL: # %bb.0:
; CHECK-AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; CHECK-AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; CHECK-AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
; CHECK-AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; CHECK-AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ define <4 x i32> @test_srem_odd_undef1(<4 x i32> %X) nounwind {
; CHECK-AVX1-LABEL: test_srem_odd_undef1:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
; CHECK-AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
Expand Down Expand Up @@ -454,7 +454,7 @@ define <4 x i32> @test_srem_even_undef1(<4 x i32> %X) nounwind {
; CHECK-AVX1-LABEL: test_srem_even_undef1:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
; CHECK-AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/sshl_sat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
; X64-AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1
; X64-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
; X64-AVX2-NEXT: vpcmpgtw %xmm0, %xmm3, %xmm0
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [32767,32767,32767,32767,32767,32767,32767,32767]
; X64-AVX2-NEXT: vpbroadcastw {{.*#+}} xmm3 = [32767,32767,32767,32767,32767,32767,32767,32767]
; X64-AVX2-NEXT: vpblendvb %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm0
; X64-AVX2-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; X64-AVX2-NEXT: vzeroupper
Expand Down Expand Up @@ -622,7 +622,7 @@ define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
; X64-AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1
; X64-AVX2-NEXT: vpackuswb %xmm3, %xmm1, %xmm1
; X64-AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; X64-AVX2-NEXT: vpbroadcastb {{.*#+}} xmm3 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; X64-AVX2-NEXT: vpblendvb %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm0
; X64-AVX2-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; X64-AVX2-NEXT: retq
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/ssub_sat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -515,58 +515,58 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
; AVX1-LABEL: v16i4:
; AVX1: # %bb.0:
; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsllw $4, %xmm0, %xmm0
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: v16i4:
; AVX2: # %bb.0:
; AVX2-NEXT: vpsllw $4, %xmm1, %xmm1
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpsllw $4, %xmm0, %xmm0
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: v16i4:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsllw $4, %xmm1, %xmm1
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512F-NEXT: vpbroadcastb {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0
; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512F-NEXT: vpbroadcastb {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512F-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v16i4:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %xmm1, %xmm1
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512BW-NEXT: vpbroadcastb {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0
; AVX512BW-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512BW-NEXT: vpbroadcastb {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512BW-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
; AVX512BW-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
Expand Down
45 changes: 33 additions & 12 deletions llvm/test/CodeGen/X86/uadd_sat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -490,14 +490,32 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
; SSE-NEXT: pminub %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: v16i4:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
; AVX1-LABEL: v16i4:
; AVX1: # %bb.0:
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: v16i4:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: v16i4:
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastb {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX512-NEXT: retq
%z = call <16 x i4> @llvm.uadd.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
ret <16 x i4> %z
}
Expand Down Expand Up @@ -896,7 +914,8 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
;
; AVX1-LABEL: v2i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: # xmm2 = mem[0,0]
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3
; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm1
Expand All @@ -906,7 +925,7 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
;
; AVX2-LABEL: v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm1
Expand Down Expand Up @@ -972,7 +991,8 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
; AVX1-LABEL: v4i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: # xmm3 = mem[0,0]
; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
; AVX1-NEXT: vpaddq %xmm5, %xmm2, %xmm2
Expand Down Expand Up @@ -1082,7 +1102,8 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
; AVX1-LABEL: v8i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: # xmm5 = mem[0,0]
; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm6
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7
; AVX1-NEXT: vpaddq %xmm7, %xmm4, %xmm4
Expand Down
28 changes: 19 additions & 9 deletions llvm/test/CodeGen/X86/umax.ll
Original file line number Diff line number Diff line change
Expand Up @@ -369,14 +369,24 @@ define <2 x i64> @test_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE-NEXT: por %xmm3, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v2i64:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm3
; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm2
; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
; AVX1-LABEL: test_v2i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: # xmm2 = mem[0,0]
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2
; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
;
; X86-LABEL: test_v2i64:
; X86: # %bb.0:
Expand Down Expand Up @@ -780,7 +790,7 @@ define <8 x i32> @test_v8i32_1(<8 x i32> %a) nounwind {
; AVX1-LABEL: test_v8i32_1:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1]
; AVX1-NEXT: vpmaxud %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpmaxud %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ define <4 x i32> @out_constant_varx_42_invmask(ptr%px, ptr%py, ptr%pmask) {
; CHECK-XOP-LABEL: out_constant_varx_42_invmask:
; CHECK-XOP: # %bb.0:
; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
; CHECK-XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [42,42,42,42]
; CHECK-XOP-NEXT: vpcmov %xmm0, (%rdi), %xmm1, %xmm0
; CHECK-XOP-NEXT: retq
%x = load <4 x i32>, ptr%px, align 16
Expand Down Expand Up @@ -289,7 +289,7 @@ define <4 x i32> @in_constant_varx_42_invmask(ptr%px, ptr%py, ptr%pmask) {
; CHECK-XOP-LABEL: in_constant_varx_42_invmask:
; CHECK-XOP: # %bb.0:
; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
; CHECK-XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [42,42,42,42]
; CHECK-XOP-NEXT: vpcmov %xmm0, (%rdi), %xmm1, %xmm0
; CHECK-XOP-NEXT: retq
%x = load <4 x i32>, ptr%px, align 16
Expand Down Expand Up @@ -459,7 +459,7 @@ define <4 x i32> @out_constant_42_vary(ptr%px, ptr%py, ptr%pmask) {
; CHECK-XOP-LABEL: out_constant_42_vary:
; CHECK-XOP: # %bb.0:
; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
; CHECK-XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [42,42,42,42]
; CHECK-XOP-NEXT: vpcmov %xmm0, (%rsi), %xmm1, %xmm0
; CHECK-XOP-NEXT: retq
%x = load <4 x i32>, ptr%px, align 16
Expand Down Expand Up @@ -496,7 +496,7 @@ define <4 x i32> @in_constant_42_vary(ptr%px, ptr%py, ptr%pmask) {
; CHECK-XOP-LABEL: in_constant_42_vary:
; CHECK-XOP: # %bb.0:
; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
; CHECK-XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [42,42,42,42]
; CHECK-XOP-NEXT: vpcmov %xmm0, (%rsi), %xmm1, %xmm0
; CHECK-XOP-NEXT: retq
%x = load <4 x i32>, ptr%px, align 16
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; AVX1-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
; AVX1-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2047,2047,2047,2047]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [2047,2047,2047,2047]
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
; AVX1-NEXT: vpsrld $1, %xmm2, %xmm2
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,3,4,5,6,7]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/urem-seteq-vec-splat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ define <4 x i32> @test_urem_odd_undef1(<4 x i32> %X) nounwind {
; CHECK-AVX1-LABEL: test_urem_odd_undef1:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
; CHECK-AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
Expand Down Expand Up @@ -382,7 +382,7 @@ define <4 x i32> @test_urem_even_undef1(<4 x i32> %X) nounwind {
; CHECK-AVX1-LABEL: test_urem_even_undef1:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
; CHECK-AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,8 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
;
; CHECK-AVX1-LABEL: t3_wide:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12297829382473034411,12297829382473034411]
; CHECK-AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [12297829382473034411,12297829382473034411]
; CHECK-AVX1-NEXT: # xmm1 = mem[0,0]
; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
; CHECK-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm3
; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
Expand All @@ -238,7 +239,7 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
;
; CHECK-AVX2-LABEL: t3_wide:
; CHECK-AVX2: # %bb.0:
; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [12297829382473034411,12297829382473034411]
; CHECK-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [12297829382473034411,12297829382473034411]
; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
; CHECK-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3
; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
Expand Down
47 changes: 33 additions & 14 deletions llvm/test/CodeGen/X86/usub_sat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -489,13 +489,29 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
; SSE-NEXT: psubusb %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: v16i4:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
; AVX1-LABEL: v16i4:
; AVX1: # %bb.0:
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: v16i4:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: v16i4:
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastb {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%z = call <16 x i4> @llvm.usub.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
ret <16 x i4> %z
}
Expand Down Expand Up @@ -807,7 +823,8 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
;
; AVX1-LABEL: v2i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: # xmm2 = mem[0,0]
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
Expand All @@ -817,7 +834,7 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
;
; AVX2-LABEL: v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2
; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
Expand Down Expand Up @@ -878,7 +895,8 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
; AVX1-LABEL: v4i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: # xmm3 = mem[0,0]
; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
; AVX1-NEXT: vpxor %xmm3, %xmm5, %xmm6
Expand Down Expand Up @@ -981,7 +999,8 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
; AVX1-LABEL: v8i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: # xmm5 = mem[0,0]
; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm6
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7
; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm8
Expand Down Expand Up @@ -1097,7 +1116,7 @@ define void @PR48223(ptr %p0) {
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2
; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64,64,64,64,64,64,64]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [64,64,64,64,64,64,64,64]
; AVX1-NEXT: vpsubusw %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vpsubusw %xmm4, %xmm2, %xmm2
; AVX1-NEXT: vpsubusw %xmm4, %xmm1, %xmm1
Expand All @@ -1112,7 +1131,7 @@ define void @PR48223(ptr %p0) {
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa (%rdi), %ymm0
; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
; AVX2-NEXT: vpsubusw %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpsubusw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vmovdqa %ymm0, (%rdi)
Expand All @@ -1124,7 +1143,7 @@ define void @PR48223(ptr %p0) {
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm2 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
; AVX512F-NEXT: vpsubusw %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: vpsubusw %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdi)
Expand Down
Loading