32 changes: 16 additions & 16 deletions llvm/test/CodeGen/X86/pmul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ define <2 x i64> @mul_v2i64c(<2 x i64> %i) nounwind {
;
; AVX-LABEL: mul_v2i64c:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [117,117]
; AVX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [117,117]
; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0
; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
Expand Down Expand Up @@ -417,9 +417,9 @@ define <32 x i8> @mul_v32i8c(<32 x i8> %i) nounwind {
; AVX2-LABEL: mul_v32i8c:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
Expand All @@ -430,9 +430,9 @@ define <32 x i8> @mul_v32i8c(<32 x i8> %i) nounwind {
; AVX512F-LABEL: mul_v32i8c:
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX512F-NEXT: vpmullw %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX512F-NEXT: vpmullw %ymm2, %ymm0, %ymm0
Expand Down Expand Up @@ -593,7 +593,7 @@ define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind {
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX2-NEXT: vpmullw %ymm2, %ymm3, %ymm2
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
Expand All @@ -607,7 +607,7 @@ define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind {
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX512F-NEXT: vpmullw %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
Expand Down Expand Up @@ -799,9 +799,9 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %i) nounwind {
; AVX2-LABEL: mul_v64i8c:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm3 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX2-NEXT: vpmullw %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX2-NEXT: vpmullw %ymm3, %ymm0, %ymm0
Expand All @@ -820,9 +820,9 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %i) nounwind {
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm3 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX512F-NEXT: vpmullw %ymm3, %ymm1, %ymm1
Expand All @@ -841,9 +841,9 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %i) nounwind {
; AVX512BW-LABEL: mul_v64i8c:
; AVX512BW: # %bb.0: # %entry
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm1 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX512BW-NEXT: vpmullw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512BW-NEXT: vpandq %zmm3, %zmm1, %zmm1
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
; AVX512BW-NEXT: vpmullw %zmm2, %zmm0, %zmm0
Expand Down Expand Up @@ -955,7 +955,7 @@ define <64 x i8> @mul_v64i8(<64 x i8> %i, <64 x i8> %j) nounwind {
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX2-NEXT: vpmullw %ymm4, %ymm5, %ymm4
; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
Expand All @@ -980,7 +980,7 @@ define <64 x i8> @mul_v64i8(<64 x i8> %i, <64 x i8> %j) nounwind {
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX512F-NEXT: vpmullw %ymm3, %ymm5, %ymm3
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpand %ymm5, %ymm3, %ymm3
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
Expand All @@ -1004,7 +1004,7 @@ define <64 x i8> @mul_v64i8(<64 x i8> %i, <64 x i8> %j) nounwind {
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
; AVX512BW-NEXT: vpmullw %zmm2, %zmm3, %zmm2
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm2
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/pmulh.ll
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ define <16 x i16> @and_mulhuw_v16i16(<16 x i32> %a, <16 x i32> %b) {
; AVX512F-LABEL: and_mulhuw_v16i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767]
; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm2 = [32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767]
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/pr37499.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
define <2 x i64> @undef_tval() {
; CHECK-LABEL: undef_tval:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [1,1,1,1,1,1,1,1]
; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm0 = [1,1,1,1,1,1,1,1]
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpmovqw %zmm0, %xmm0 {%k1}
Expand All @@ -18,7 +18,7 @@ define <2 x i64> @undef_tval() {
define <2 x i64> @foo(<8 x i64> %x) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1]
; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1]
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpmovqw %zmm0, %xmm1 {%k1}
Expand All @@ -33,7 +33,7 @@ define <2 x i64> @foo(<8 x i64> %x) {
define <4 x i64> @goo(<16 x i32> %x) {
; CHECK-LABEL: goo:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; CHECK-NEXT: movw $1, %ax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpmovdw %zmm0, %ymm1 {%k1}
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/X86/prefer-avx256-lzcnt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ define <16 x i16> @testv16i16(<16 x i16> %in) {
define <32 x i8> @testv32i8(<32 x i8> %in) {
; AVX256-LABEL: testv32i8:
; AVX256: # %bb.0:
; AVX256-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX256-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX256-NEXT: # ymm1 = mem[0,1,0,1]
; AVX256-NEXT: vpshufb %ymm0, %ymm1, %ymm2
; AVX256-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX256-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
Expand Down
58 changes: 44 additions & 14 deletions llvm/test/CodeGen/X86/prefer-avx256-popcnt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ define <8 x i16> @testv8i16(<8 x i16> %in) {
define <16 x i8> @testv16i8(<16 x i8> %in) {
; AVX256-LABEL: testv16i8:
; AVX256: # %bb.0:
; AVX256-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX256-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX256-NEXT: vpand %xmm1, %xmm0, %xmm2
; AVX256-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX256-NEXT: vpshufb %xmm2, %xmm3, %xmm2
Expand All @@ -60,9 +60,10 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
define <16 x i16> @testv16i16(<16 x i16> %in) {
; AVX256-LABEL: testv16i16:
; AVX256: # %bb.0:
; AVX256-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX256-NEXT: vpbroadcastd {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX256-NEXT: vpand %ymm1, %ymm0, %ymm2
; AVX256-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX256-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX256-NEXT: # ymm3 = mem[0,1,0,1]
; AVX256-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX256-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX256-NEXT: vpand %ymm1, %ymm0, %ymm0
Expand All @@ -84,17 +85,44 @@ define <16 x i16> @testv16i16(<16 x i16> %in) {
}

define <32 x i8> @testv32i8(<32 x i8> %in) {
; CHECK-LABEL: testv32i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; CHECK-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; CHECK-NEXT: vpsrlw $4, %ymm0, %ymm0
; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; CHECK-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; CHECK-NEXT: retq
; AVX256-LABEL: testv32i8:
; AVX256: # %bb.0:
; AVX256-NEXT: vpbroadcastd {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX256-NEXT: vpand %ymm1, %ymm0, %ymm2
; AVX256-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX256-NEXT: # ymm3 = mem[0,1,0,1]
; AVX256-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX256-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX256-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX256-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; AVX256-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX256-NEXT: retq
;
; AVX512VL-LABEL: testv32i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512VL-NEXT: vpand %ymm1, %ymm0, %ymm2
; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512VL-NEXT: # ymm3 = mem[0,1,0,1]
; AVX512VL-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512VL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; AVX512VL-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512F-LABEL: testv32i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2
; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512F-NEXT: # ymm3 = mem[0,1,0,1]
; AVX512F-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; AVX512F-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: retq
%out = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %in)
ret <32 x i8> %out
}
Expand All @@ -103,3 +131,5 @@ declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) {
; AVX256BW: # %bb.0:
; AVX256BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX256BW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; AVX256BW-NEXT: vmovdqa {{.*#+}} ymm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
; AVX256BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
; AVX256BW-NEXT: vpmullw %ymm3, %ymm2, %ymm2
; AVX256BW-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX256BW-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
Expand Down Expand Up @@ -61,7 +61,7 @@ define <32 x i8> @test_mul_32i8(<32 x i8> %a, <32 x i8> %b) {
; AVX256BW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX256BW-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX256BW-NEXT: vpmullw %ymm2, %ymm3, %ymm2
; AVX256BW-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX256BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX256BW-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX256BW-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX256BW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/psubus.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2445,7 +2445,7 @@ define <64 x i8> @test27(<64 x i8> %x) {
;
; AVX2-LABEL: test27:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154]
; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm2 = [154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154]
; AVX2-NEXT: vpsubusb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpsubusb %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/sadd_sat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -530,43 +530,43 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
; AVX2-LABEL: v16i4:
; AVX2: # %bb.0:
; AVX2-NEXT: vpsllw $4, %xmm1, %xmm1
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpsllw $4, %xmm0, %xmm0
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: v16i4:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsllw $4, %xmm1, %xmm1
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512F-NEXT: vpbroadcastb {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0
; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512F-NEXT: vpbroadcastb {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512F-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v16i4:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %xmm1, %xmm1
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512BW-NEXT: vpbroadcastb {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0
; AVX512BW-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512BW-NEXT: vpbroadcastb {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512BW-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
; AVX512BW-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/sat-add.ll
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ define <2 x i64> @unsigned_sat_constant_v2i64_using_min(<2 x i64> %x) {
; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = [18446744073709551573,18446744073709551573]
; AVX2-NEXT: # xmm1 = mem[0,0]
; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775765,9223372036854775765]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854775765,9223372036854775765]
; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
Expand Down Expand Up @@ -726,7 +726,7 @@ define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_sum(<2 x i64> %x) {
;
; AVX2-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm2
; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1
Expand Down Expand Up @@ -785,7 +785,7 @@ define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_notval(<2 x i64> %x) {
;
; AVX2-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm2
; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1
Expand Down Expand Up @@ -1267,7 +1267,7 @@ define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_sum(<2 x i64> %x, <2 x i
;
; AVX2-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm1
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/setcc-non-simple-type.ll
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ define void @failing(ptr %0, ptr %1) nounwind {
; CHECK-AVX2-NEXT: movq 32(%rsi), %rdx
; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
; CHECK-AVX2-NEXT: xorl %esi, %esi
; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1]
; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [2,2]
; CHECK-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1]
; CHECK-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [2,2]
; CHECK-AVX2-NEXT: .p2align 4, 0x90
; CHECK-AVX2-NEXT: .LBB0_1: # %vector.ph
; CHECK-AVX2-NEXT: # =>This Loop Header: Depth=1
Expand Down
351 changes: 236 additions & 115 deletions llvm/test/CodeGen/X86/shuffle-strided-with-offset-256.ll

Large diffs are not rendered by default.

177 changes: 118 additions & 59 deletions llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,27 @@
; Ideally, the shuffles should be lowered to code with the same quality as the truncates.

define void @shuffle_v32i8_to_v16i8(ptr %L, ptr %S) nounwind {
; AVX-LABEL: shuffle_v32i8_to_v16i8:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
; AVX-NEXT: vpand 16(%rdi), %xmm0, %xmm1
; AVX-NEXT: vpand (%rdi), %xmm0, %xmm0
; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovdqa %xmm0, (%rsi)
; AVX-NEXT: retq
; AVX1-LABEL: shuffle_v32i8_to_v16i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vpand 16(%rdi), %xmm0, %xmm1
; AVX1-NEXT: vpand (%rdi), %xmm0, %xmm0
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa %xmm0, (%rsi)
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v32i8_to_v16i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpand 16(%rdi), %xmm0, %xmm1
; AVX2-NEXT: vpand (%rdi), %xmm0, %xmm0
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa %xmm0, (%rsi)
; AVX2-NEXT: retq
;
; AVX512F-LABEL: shuffle_v32i8_to_v16i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpbroadcastw {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpand 16(%rdi), %xmm0, %xmm1
; AVX512F-NEXT: vpand (%rdi), %xmm0, %xmm0
; AVX512F-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
Expand All @@ -38,7 +47,7 @@ define void @shuffle_v32i8_to_v16i8(ptr %L, ptr %S) nounwind {
;
; AVX512VL-LABEL: shuffle_v32i8_to_v16i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
; AVX512VL-NEXT: vpand 16(%rdi), %xmm0, %xmm1
; AVX512VL-NEXT: vpand (%rdi), %xmm0, %xmm0
; AVX512VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
Expand Down Expand Up @@ -373,16 +382,27 @@ define void @trunc_v4i64_to_v4i32(ptr %L, ptr %S) nounwind {
}

define void @shuffle_v32i8_to_v8i8(ptr %L, ptr %S) nounwind {
; AVX-LABEL: shuffle_v32i8_to_v8i8:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa (%rdi), %xmm0
; AVX-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX-NEXT: vmovq %xmm0, (%rsi)
; AVX-NEXT: retq
; AVX1-LABEL: shuffle_v32i8_to_v8i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovdqa (%rdi), %xmm0
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX1-NEXT: vmovq %xmm0, (%rsi)
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v32i8_to_v8i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa (%rdi), %xmm0
; AVX2-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [0,4,8,12,0,4,8,12,0,4,8,12,0,4,8,12]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX2-NEXT: vmovq %xmm0, (%rsi)
; AVX2-NEXT: retq
;
; AVX512F-LABEL: shuffle_v32i8_to_v8i8:
; AVX512F: # %bb.0:
Expand Down Expand Up @@ -427,16 +447,27 @@ define void @shuffle_v32i8_to_v8i8(ptr %L, ptr %S) nounwind {
}

define void @trunc_v8i32_to_v8i8(ptr %L, ptr %S) nounwind {
; AVX-LABEL: trunc_v8i32_to_v8i8:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa (%rdi), %xmm0
; AVX-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX-NEXT: vmovq %xmm0, (%rsi)
; AVX-NEXT: retq
; AVX1-LABEL: trunc_v8i32_to_v8i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovdqa (%rdi), %xmm0
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX1-NEXT: vmovq %xmm0, (%rsi)
; AVX1-NEXT: retq
;
; AVX2-LABEL: trunc_v8i32_to_v8i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa (%rdi), %xmm0
; AVX2-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [0,4,8,12,0,4,8,12,0,4,8,12,0,4,8,12]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX2-NEXT: vmovq %xmm0, (%rsi)
; AVX2-NEXT: retq
;
; AVX512F-LABEL: trunc_v8i32_to_v8i8:
; AVX512F: # %bb.0:
Expand Down Expand Up @@ -498,7 +529,7 @@ define <2 x i64> @trunc_v8i32_to_v8i8_return_v2i64(<8 x i32> %vec) nounwind {
; AVX2-LABEL: trunc_v8i32_to_v8i8_return_v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [0,4,8,12,0,4,8,12,0,4,8,12,0,4,8,12]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
Expand Down Expand Up @@ -671,7 +702,7 @@ define <16 x i8> @trunc_v8i32_to_v8i8_return_v16i8(<8 x i32> %vec) nounwind {
; AVX2-LABEL: trunc_v8i32_to_v8i8_return_v16i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [0,4,8,12,0,4,8,12,0,4,8,12,0,4,8,12]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
Expand Down Expand Up @@ -800,7 +831,8 @@ define <8 x i16> @trunc_v4i64_to_v4i16_with_zext_return_v8i16(<4 x i64> %vec) no
;
; AVX2-FAST-ALL-LABEL: trunc_v4i64_to_v4i16_with_zext_return_v8i16:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
; AVX2-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,2,4,6,0,2,4,6]
; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-FAST-ALL-NEXT: vzeroupper
Expand Down Expand Up @@ -871,7 +903,8 @@ define <8 x i16> @trunc_v4i64_to_v4i16_via_v4i32_return_v8i16(<4 x i64> %vec) no
;
; AVX2-FAST-ALL-LABEL: trunc_v4i64_to_v4i16_via_v4i32_return_v8i16:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
; AVX2-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,2,4,6,0,2,4,6]
; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-FAST-ALL-NEXT: vzeroupper
Expand Down Expand Up @@ -999,7 +1032,7 @@ define <16 x i8> @trunc_v4i64_to_v4i8_return_v16i8(<4 x i64> %vec) nounwind {
; AVX2-LABEL: trunc_v4i64_to_v4i8_return_v16i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [0,8,0,8,0,8,0,8,0,8,0,8,0,8,0,8]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
Expand Down Expand Up @@ -1156,16 +1189,27 @@ define void @trunc_v4i64_to_v4i16(ptr %L, ptr %S) nounwind {
}

define void @shuffle_v32i8_to_v4i8(ptr %L, ptr %S) nounwind {
; AVX-LABEL: shuffle_v32i8_to_v4i8:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa (%rdi), %xmm0
; AVX-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX-NEXT: vmovd %xmm0, (%rsi)
; AVX-NEXT: retq
; AVX1-LABEL: shuffle_v32i8_to_v4i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovdqa (%rdi), %xmm0
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX1-NEXT: vmovd %xmm0, (%rsi)
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v32i8_to_v4i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa (%rdi), %xmm0
; AVX2-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [0,8,0,8,0,8,0,8,0,8,0,8,0,8,0,8]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX2-NEXT: vmovd %xmm0, (%rsi)
; AVX2-NEXT: retq
;
; AVX512F-LABEL: shuffle_v32i8_to_v4i8:
; AVX512F: # %bb.0:
Expand Down Expand Up @@ -1210,16 +1254,27 @@ define void @shuffle_v32i8_to_v4i8(ptr %L, ptr %S) nounwind {
}

define void @trunc_v4i64_to_v4i8(ptr %L, ptr %S) nounwind {
; AVX-LABEL: trunc_v4i64_to_v4i8:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa (%rdi), %xmm0
; AVX-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX-NEXT: vmovd %xmm0, (%rsi)
; AVX-NEXT: retq
; AVX1-LABEL: trunc_v4i64_to_v4i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovdqa (%rdi), %xmm0
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX1-NEXT: vmovd %xmm0, (%rsi)
; AVX1-NEXT: retq
;
; AVX2-LABEL: trunc_v4i64_to_v4i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa (%rdi), %xmm0
; AVX2-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [0,8,0,8,0,8,0,8,0,8,0,8,0,8,0,8]
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX2-NEXT: vmovd %xmm0, (%rsi)
; AVX2-NEXT: retq
;
; AVX512F-LABEL: trunc_v4i64_to_v4i8:
; AVX512F: # %bb.0:
Expand Down Expand Up @@ -1281,7 +1336,8 @@ define <16 x i8> @negative(<32 x i8> %v, <32 x i8> %w) nounwind {
; AVX2-LABEL: negative:
; AVX2: # %bb.0:
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Expand All @@ -1291,7 +1347,8 @@ define <16 x i8> @negative(<32 x i8> %v, <32 x i8> %w) nounwind {
; AVX512F-LABEL: negative:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30]
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: # ymm2 = mem[0,1,0,1]
; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Expand All @@ -1310,7 +1367,8 @@ define <16 x i8> @negative(<32 x i8> %v, <32 x i8> %w) nounwind {
; AVX512BW-LABEL: negative:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30]
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512BW-NEXT: # ymm2 = mem[0,1,0,1]
; AVX512BW-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Expand All @@ -1330,7 +1388,8 @@ define <16 x i8> @negative(<32 x i8> %v, <32 x i8> %w) nounwind {
;
; AVX512VBMIVL-LABEL: negative:
; AVX512VBMIVL: # %bb.0:
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} ymm2 = <32,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512VBMIVL-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [32,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30]
; AVX512VBMIVL-NEXT: # ymm2 = mem[0,1,0,1]
; AVX512VBMIVL-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
; AVX512VBMIVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512VBMIVL-NEXT: vzeroupper
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ define <4 x double> @PR34175(ptr %p) {
;
; AVX512BW-LABEL: PR34175:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <0,8,32,40,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm0 = [0,8,32,40,0,8,32,40,0,8,32,40,0,8,32,40]
; AVX512BW-NEXT: vmovdqu (%rdi), %ymm1
; AVX512BW-NEXT: vmovdqu 32(%rdi), %ymm2
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
Expand All @@ -392,7 +392,7 @@ define <4 x double> @PR34175(ptr %p) {
;
; AVX512BWVL-LABEL: PR34175:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm0 = <0,8,16,24,u,u,u,u>
; AVX512BWVL-NEXT: vpbroadcastq {{.*#+}} xmm0 = [0,8,16,24,0,8,16,24]
; AVX512BWVL-NEXT: vmovdqu (%rdi), %ymm1
; AVX512BWVL-NEXT: vpermt2w 32(%rdi), %ymm0, %ymm1
; AVX512BWVL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
Expand All @@ -401,7 +401,7 @@ define <4 x double> @PR34175(ptr %p) {
;
; AVX512VBMI-LABEL: PR34175:
; AVX512VBMI: # %bb.0:
; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm0 = <0,8,32,40,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} ymm0 = [0,8,32,40,0,8,32,40,0,8,32,40,0,8,32,40]
; AVX512VBMI-NEXT: vmovdqu (%rdi), %ymm1
; AVX512VBMI-NEXT: vmovdqu 32(%rdi), %ymm2
; AVX512VBMI-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
Expand All @@ -411,7 +411,7 @@ define <4 x double> @PR34175(ptr %p) {
;
; AVX512VBMIVL-LABEL: PR34175:
; AVX512VBMIVL: # %bb.0:
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} xmm0 = <0,8,16,24,u,u,u,u>
; AVX512VBMIVL-NEXT: vpbroadcastq {{.*#+}} xmm0 = [0,8,16,24,0,8,16,24]
; AVX512VBMIVL-NEXT: vmovdqu (%rdi), %ymm1
; AVX512VBMIVL-NEXT: vpermt2w 32(%rdi), %ymm0, %ymm1
; AVX512VBMIVL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/slow-pmulld.ll
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ define <4 x i32> @test_mul_v4i32_v4i16(<4 x i16> %A) {
;
; AVX2-SLOW-LABEL: test_mul_v4i32_v4i16:
; AVX2-SLOW: # %bb.0:
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = <18778,18778,18778,18778,u,u,u,u>
; AVX2-SLOW-NEXT: vpbroadcastw {{.*#+}} xmm1 = [18778,18778,18778,18778,18778,18778,18778,18778]
; AVX2-SLOW-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2
; AVX2-SLOW-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
Expand Down Expand Up @@ -407,7 +407,7 @@ define <8 x i32> @test_mul_v8i32_v8i16(<8 x i16> %A) {
;
; AVX2-SLOW-LABEL: test_mul_v8i32_v8i16:
; AVX2-SLOW: # %bb.0:
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [18778,18778,18778,18778,18778,18778,18778,18778]
; AVX2-SLOW-NEXT: vpbroadcastw {{.*#+}} xmm1 = [18778,18778,18778,18778,18778,18778,18778,18778]
; AVX2-SLOW-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2
; AVX2-SLOW-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
Expand Down Expand Up @@ -490,7 +490,7 @@ define <16 x i32> @test_mul_v16i32_v16i16(<16 x i16> %A) {
;
; AVX2-SLOW-LABEL: test_mul_v16i32_v16i16:
; AVX2-SLOW: # %bb.0:
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm1 = [18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778]
; AVX2-SLOW-NEXT: vpbroadcastw {{.*#+}} ymm1 = [18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778]
; AVX2-SLOW-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2
; AVX2-SLOW-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2434,7 +2434,7 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
; CHECK-AVX2-NEXT: vpackuswb %ymm3, %ymm4, %ymm3
; CHECK-AVX2-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; CHECK-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4
; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; CHECK-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; CHECK-AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4
; CHECK-AVX2-NEXT: vpshufb {{.*#+}} ymm6 = ymm0[8],zero,ymm0[9],zero,zero,zero,ymm0[11],zero,zero,zero,ymm0[13],zero,zero,zero,ymm0[15],zero,zero,zero,ymm0[25],zero,zero,zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,zero,zero,ymm0[31],zero
; CHECK-AVX2-NEXT: vpackuswb %ymm6, %ymm4, %ymm4
Expand Down Expand Up @@ -2471,7 +2471,7 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
; CHECK-AVX512VL: # %bb.0:
; CHECK-AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; CHECK-AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; CHECK-AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
; CHECK-AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; CHECK-AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/sshl_sat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
; X64-AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1
; X64-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
; X64-AVX2-NEXT: vpcmpgtw %xmm0, %xmm3, %xmm0
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [32767,32767,32767,32767,32767,32767,32767,32767]
; X64-AVX2-NEXT: vpbroadcastw {{.*#+}} xmm3 = [32767,32767,32767,32767,32767,32767,32767,32767]
; X64-AVX2-NEXT: vpblendvb %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm0
; X64-AVX2-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; X64-AVX2-NEXT: vzeroupper
Expand Down Expand Up @@ -622,7 +622,7 @@ define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
; X64-AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1
; X64-AVX2-NEXT: vpackuswb %xmm3, %xmm1, %xmm1
; X64-AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; X64-AVX2-NEXT: vpbroadcastb {{.*#+}} xmm3 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; X64-AVX2-NEXT: vpblendvb %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm0
; X64-AVX2-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; X64-AVX2-NEXT: retq
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/ssub_sat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -530,43 +530,43 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
; AVX2-LABEL: v16i4:
; AVX2: # %bb.0:
; AVX2-NEXT: vpsllw $4, %xmm1, %xmm1
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpsllw $4, %xmm0, %xmm0
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: v16i4:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsllw $4, %xmm1, %xmm1
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512F-NEXT: vpbroadcastb {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0
; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512F-NEXT: vpbroadcastb {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512F-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v16i4:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %xmm1, %xmm1
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512BW-NEXT: vpbroadcastb {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0
; AVX512BW-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512BW-NEXT: vpbroadcastb {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512BW-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
; AVX512BW-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
Expand Down
36 changes: 27 additions & 9 deletions llvm/test/CodeGen/X86/uadd_sat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -490,14 +490,32 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
; SSE-NEXT: pminub %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: v16i4:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
; AVX1-LABEL: v16i4:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: v16i4:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: v16i4:
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastb {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX512-NEXT: retq
%z = call <16 x i4> @llvm.uadd.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
ret <16 x i4> %z
}
Expand Down Expand Up @@ -906,7 +924,7 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
;
; AVX2-LABEL: v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm1
Expand Down
25 changes: 17 additions & 8 deletions llvm/test/CodeGen/X86/umax.ll
Original file line number Diff line number Diff line change
Expand Up @@ -369,14 +369,23 @@ define <2 x i64> @test_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE-NEXT: por %xmm3, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v2i64:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm3
; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm2
; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
; AVX1-LABEL: test_v2i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2
; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
;
; X86-LABEL: test_v2i64:
; X86: # %bb.0:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
;
; CHECK-AVX2-LABEL: t3_wide:
; CHECK-AVX2: # %bb.0:
; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [12297829382473034411,12297829382473034411]
; CHECK-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [12297829382473034411,12297829382473034411]
; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
; CHECK-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3
; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
Expand Down
36 changes: 26 additions & 10 deletions llvm/test/CodeGen/X86/usub_sat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -489,13 +489,29 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
; SSE-NEXT: psubusb %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: v16i4:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
; AVX1-LABEL: v16i4:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: v16i4:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: v16i4:
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastb {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%z = call <16 x i4> @llvm.usub.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
ret <16 x i4> %z
}
Expand Down Expand Up @@ -817,7 +833,7 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
;
; AVX2-LABEL: v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2
; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
Expand Down Expand Up @@ -1112,7 +1128,7 @@ define void @PR48223(ptr %p0) {
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa (%rdi), %ymm0
; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
; AVX2-NEXT: vpsubusw %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpsubusw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vmovdqa %ymm0, (%rdi)
Expand All @@ -1124,7 +1140,7 @@ define void @PR48223(ptr %p0) {
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm2 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
; AVX512F-NEXT: vpsubusw %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: vpsubusw %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdi)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/vec_anyext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ define <4 x i8> @func_8_64(ptr %a, ptr %b) nounwind {
; X64: # %bb.0:
; X64-NEXT: vmovdqa (%rdi), %xmm0
; X64-NEXT: vmovdqa 16(%rdi), %xmm1
; X64-NEXT: vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; X64-NEXT: vpbroadcastw {{.*#+}} xmm2 = [0,8,0,8,0,8,0,8,0,8,0,8,0,8,0,8]
; X64-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; X64-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; X64-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/vec_cmp_uint-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
;
; AVX2-LABEL: ge_v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
Expand Down Expand Up @@ -516,7 +516,7 @@ define <2 x i64> @gt_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
;
; AVX2-LABEL: gt_v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
Expand Down Expand Up @@ -764,7 +764,7 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
;
; AVX2-LABEL: le_v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
Expand Down Expand Up @@ -939,7 +939,7 @@ define <2 x i64> @lt_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
;
; AVX2-LABEL: lt_v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
Expand Down
77 changes: 51 additions & 26 deletions llvm/test/CodeGen/X86/vec_int_to_fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,23 @@ define <2 x float> @uitofp_2i32_to_2f32(<2 x i32> %a) {
; SSE41-NEXT: cvtpd2ps %xmm0, %xmm0
; SSE41-NEXT: retq
;
; VEX-LABEL: uitofp_2i32_to_2f32:
; VEX: # %bb.0:
; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; VEX-NEXT: vpor %xmm1, %xmm0, %xmm0
; VEX-NEXT: vsubpd %xmm1, %xmm0, %xmm0
; VEX-NEXT: vcvtpd2ps %xmm0, %xmm0
; VEX-NEXT: retq
; AVX1-LABEL: uitofp_2i32_to_2f32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: uitofp_2i32_to_2f32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vcvtpd2ps %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: uitofp_2i32_to_2f32:
; AVX512F: # %bb.0:
Expand Down Expand Up @@ -667,13 +676,21 @@ define <2 x double> @uitofp_2i32_to_2f64(<4 x i32> %a) {
; SSE41-NEXT: subpd %xmm1, %xmm0
; SSE41-NEXT: retq
;
; VEX-LABEL: uitofp_2i32_to_2f64:
; VEX: # %bb.0:
; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; VEX-NEXT: vpor %xmm1, %xmm0, %xmm0
; VEX-NEXT: vsubpd %xmm1, %xmm0, %xmm0
; VEX-NEXT: retq
; AVX1-LABEL: uitofp_2i32_to_2f64:
; AVX1: # %bb.0:
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: uitofp_2i32_to_2f64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: uitofp_2i32_to_2f64:
; AVX512F: # %bb.0:
Expand Down Expand Up @@ -3343,13 +3360,21 @@ define <2 x double> @uitofp_load_2i32_to_2f64(ptr%a) {
; SSE41-NEXT: subpd %xmm1, %xmm0
; SSE41-NEXT: retq
;
; VEX-LABEL: uitofp_load_2i32_to_2f64:
; VEX: # %bb.0:
; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; VEX-NEXT: vpor %xmm1, %xmm0, %xmm0
; VEX-NEXT: vsubpd %xmm1, %xmm0, %xmm0
; VEX-NEXT: retq
; AVX1-LABEL: uitofp_load_2i32_to_2f64:
; AVX1: # %bb.0:
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: uitofp_load_2i32_to_2f64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: uitofp_load_2i32_to_2f64:
; AVX512F: # %bb.0:
Expand Down Expand Up @@ -5663,10 +5688,10 @@ define void @PR43609(ptr nocapture %x, <2 x i64> %y) #0 {
; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpblendd {{.*#+}} xmm3 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
; AVX2-NEXT: vpor %xmm4, %xmm3, %xmm3
; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
; AVX2-NEXT: vpor %xmm5, %xmm0, %xmm0
; AVX2-NEXT: vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
; AVX2-NEXT: # xmm6 = mem[0,0]
Expand All @@ -5691,10 +5716,10 @@ define void @PR43609(ptr nocapture %x, <2 x i64> %y) #0 {
; AVX512F-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpblendd {{.*#+}} xmm3 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
; AVX512F-NEXT: vpor %xmm4, %xmm3, %xmm3
; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm0
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
; AVX512F-NEXT: vpor %xmm5, %xmm0, %xmm0
; AVX512F-NEXT: vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
; AVX512F-NEXT: # xmm6 = mem[0,0]
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/vec_minmax_uint.ll
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ define <2 x i64> @max_gt_v2i64(<2 x i64> %a, <2 x i64> %b) {
;
; AVX2-LABEL: max_gt_v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2
; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
Expand Down Expand Up @@ -477,7 +477,7 @@ define <2 x i64> @max_ge_v2i64(<2 x i64> %a, <2 x i64> %b) {
;
; AVX2-LABEL: max_ge_v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2
; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
Expand Down Expand Up @@ -882,7 +882,7 @@ define <2 x i64> @min_lt_v2i64(<2 x i64> %a, <2 x i64> %b) {
;
; AVX2-LABEL: min_lt_v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
Expand Down Expand Up @@ -1290,7 +1290,7 @@ define <2 x i64> @min_le_v2i64(<2 x i64> %a, <2 x i64> %b) {
;
; AVX2-LABEL: min_le_v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/vec_smulo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1816,7 +1816,7 @@ define <32 x i32> @smulo_v32i8(<32 x i8> %a0, <32 x i8> %a1, ptr %p2) nounwind {
; AVX2-NEXT: vpmulhw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm1
; AVX2-NEXT: vpackuswb %ymm4, %ymm1, %ymm1
; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
; AVX2-NEXT: vpackuswb %ymm3, %ymm0, %ymm4
Expand Down Expand Up @@ -2546,7 +2546,7 @@ define <64 x i32> @smulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, ptr %p2) nounwind {
; AVX2-NEXT: vpmulhw %ymm3, %ymm1, %ymm1
; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm3
; AVX2-NEXT: vpackuswb %ymm6, %ymm3, %ymm3
; AVX2-NEXT: vmovdqa {{.*#+}} ymm6 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm6 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpand %ymm6, %ymm5, %ymm5
; AVX2-NEXT: vpand %ymm6, %ymm1, %ymm1
; AVX2-NEXT: vpackuswb %ymm5, %ymm1, %ymm1
Expand Down Expand Up @@ -2666,7 +2666,7 @@ define <64 x i32> @smulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, ptr %p2) nounwind {
; AVX512BW-NEXT: vpmulhw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm1
; AVX512BW-NEXT: vpackuswb %zmm4, %zmm1, %zmm1
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm3
; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpackuswb %zmm3, %zmm0, %zmm4
Expand Down
31 changes: 21 additions & 10 deletions llvm/test/CodeGen/X86/vec_uaddo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -855,16 +855,27 @@ define <2 x i32> @uaddo_v2i64(<2 x i64> %a0, <2 x i64> %a1, ptr %p2) nounwind {
; SSE-NEXT: movdqa %xmm1, (%rdi)
; SSE-NEXT: retq
;
; AVX-LABEL: uaddo_v2i64:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3
; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm1
; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm0
; AVX-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX-NEXT: vmovdqa %xmm1, (%rdi)
; AVX-NEXT: retq
; AVX1-LABEL: uaddo_v2i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3
; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm0
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX1-NEXT: vmovdqa %xmm1, (%rdi)
; AVX1-NEXT: retq
;
; AVX2-LABEL: uaddo_v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm0
; AVX2-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX2-NEXT: vmovdqa %xmm1, (%rdi)
; AVX2-NEXT: retq
;
; AVX512-LABEL: uaddo_v2i64:
; AVX512: # %bb.0:
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/vec_umulo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1555,7 +1555,7 @@ define <32 x i32> @umulo_v32i8(<32 x i8> %a0, <32 x i8> %a1, ptr %p2) nounwind {
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
; AVX2-NEXT: vpmullw %ymm3, %ymm4, %ymm3
; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm5
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23]
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
Expand Down Expand Up @@ -2216,7 +2216,7 @@ define <64 x i32> @umulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, ptr %p2) nounwind {
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15],ymm2[24],ymm4[24],ymm2[25],ymm4[25],ymm2[26],ymm4[26],ymm2[27],ymm4[27],ymm2[28],ymm4[28],ymm2[29],ymm4[29],ymm2[30],ymm4[30],ymm2[31],ymm4[31]
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm6 = ymm0[8],ymm4[8],ymm0[9],ymm4[9],ymm0[10],ymm4[10],ymm0[11],ymm4[11],ymm0[12],ymm4[12],ymm0[13],ymm4[13],ymm0[14],ymm4[14],ymm0[15],ymm4[15],ymm0[24],ymm4[24],ymm0[25],ymm4[25],ymm0[26],ymm4[26],ymm0[27],ymm4[27],ymm0[28],ymm4[28],ymm0[29],ymm4[29],ymm0[30],ymm4[30],ymm0[31],ymm4[31]
; AVX2-NEXT: vpmullw %ymm5, %ymm6, %ymm5
; AVX2-NEXT: vmovdqa {{.*#+}} ymm6 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm6 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpand %ymm6, %ymm5, %ymm7
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[16],ymm4[16],ymm2[17],ymm4[17],ymm2[18],ymm4[18],ymm2[19],ymm4[19],ymm2[20],ymm4[20],ymm2[21],ymm4[21],ymm2[22],ymm4[22],ymm2[23],ymm4[23]
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm4[0],ymm0[1],ymm4[1],ymm0[2],ymm4[2],ymm0[3],ymm4[3],ymm0[4],ymm4[4],ymm0[5],ymm4[5],ymm0[6],ymm4[6],ymm0[7],ymm4[7],ymm0[16],ymm4[16],ymm0[17],ymm4[17],ymm0[18],ymm4[18],ymm0[19],ymm4[19],ymm0[20],ymm4[20],ymm0[21],ymm4[21],ymm0[22],ymm4[22],ymm0[23],ymm4[23]
Expand Down Expand Up @@ -2322,7 +2322,7 @@ define <64 x i32> @umulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, ptr %p2) nounwind {
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8],zmm2[8],zmm0[9],zmm2[9],zmm0[10],zmm2[10],zmm0[11],zmm2[11],zmm0[12],zmm2[12],zmm0[13],zmm2[13],zmm0[14],zmm2[14],zmm0[15],zmm2[15],zmm0[24],zmm2[24],zmm0[25],zmm2[25],zmm0[26],zmm2[26],zmm0[27],zmm2[27],zmm0[28],zmm2[28],zmm0[29],zmm2[29],zmm0[30],zmm2[30],zmm0[31],zmm2[31],zmm0[40],zmm2[40],zmm0[41],zmm2[41],zmm0[42],zmm2[42],zmm0[43],zmm2[43],zmm0[44],zmm2[44],zmm0[45],zmm2[45],zmm0[46],zmm2[46],zmm0[47],zmm2[47],zmm0[56],zmm2[56],zmm0[57],zmm2[57],zmm0[58],zmm2[58],zmm0[59],zmm2[59],zmm0[60],zmm2[60],zmm0[61],zmm2[61],zmm0[62],zmm2[62],zmm0[63],zmm2[63]
; AVX512BW-NEXT: vpmullw %zmm3, %zmm4, %zmm3
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512BW-NEXT: vpandq %zmm4, %zmm3, %zmm5
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[32],zmm2[32],zmm1[33],zmm2[33],zmm1[34],zmm2[34],zmm1[35],zmm2[35],zmm1[36],zmm2[36],zmm1[37],zmm2[37],zmm1[38],zmm2[38],zmm1[39],zmm2[39],zmm1[48],zmm2[48],zmm1[49],zmm2[49],zmm1[50],zmm2[50],zmm1[51],zmm2[51],zmm1[52],zmm2[52],zmm1[53],zmm2[53],zmm1[54],zmm2[54],zmm1[55],zmm2[55]
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm2[0],zmm0[1],zmm2[1],zmm0[2],zmm2[2],zmm0[3],zmm2[3],zmm0[4],zmm2[4],zmm0[5],zmm2[5],zmm0[6],zmm2[6],zmm0[7],zmm2[7],zmm0[16],zmm2[16],zmm0[17],zmm2[17],zmm0[18],zmm2[18],zmm0[19],zmm2[19],zmm0[20],zmm2[20],zmm0[21],zmm2[21],zmm0[22],zmm2[22],zmm0[23],zmm2[23],zmm0[32],zmm2[32],zmm0[33],zmm2[33],zmm0[34],zmm2[34],zmm0[35],zmm2[35],zmm0[36],zmm2[36],zmm0[37],zmm2[37],zmm0[38],zmm2[38],zmm0[39],zmm2[39],zmm0[48],zmm2[48],zmm0[49],zmm2[49],zmm0[50],zmm2[50],zmm0[51],zmm2[51],zmm0[52],zmm2[52],zmm0[53],zmm2[53],zmm0[54],zmm2[54],zmm0[55],zmm2[55]
Expand Down
31 changes: 21 additions & 10 deletions llvm/test/CodeGen/X86/vec_usubo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -902,16 +902,27 @@ define <2 x i32> @usubo_v2i64(<2 x i64> %a0, <2 x i64> %a1, ptr %p2) nounwind {
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: usubo_v2i64:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3
; AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm1
; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm0
; AVX-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX-NEXT: vmovdqa %xmm1, (%rdi)
; AVX-NEXT: retq
; AVX1-LABEL: usubo_v2i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3
; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm0
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX1-NEXT: vmovdqa %xmm1, (%rdi)
; AVX1-NEXT: retq
;
; AVX2-LABEL: usubo_v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm0
; AVX2-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX2-NEXT: vmovdqa %xmm1, (%rdi)
; AVX2-NEXT: retq
;
; AVX512-LABEL: usubo_v2i64:
; AVX512: # %bb.0:
Expand Down
408 changes: 285 additions & 123 deletions llvm/test/CodeGen/X86/vector-bitreverse.ll

Large diffs are not rendered by default.

34 changes: 23 additions & 11 deletions llvm/test/CodeGen/X86/vector-blend.ll
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,17 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: vsel_4xi8:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <255,255,0,255,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
; AVX1-LABEL: vsel_4xi8:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <255,255,0,255,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: vsel_4xi8:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255]
; AVX2-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
entry:
%vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i8> %v1, <4 x i8> %v2
ret <4 x i8> %vsel
Expand Down Expand Up @@ -262,11 +268,17 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: vsel_i8:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
; AVX1-LABEL: vsel_i8:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: vsel_i8:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
; AVX2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
entry:
%vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2
ret <16 x i8> %vsel
Expand Down Expand Up @@ -627,7 +639,7 @@ define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
;
; AVX2-LABEL: constant_pblendvb_avx2:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
entry:
Expand Down
159 changes: 104 additions & 55 deletions llvm/test/CodeGen/X86/vector-fshl-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
;
; AVX2-LABEL: var_funnnel_v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX2-NEXT: vpsrlq $1, %xmm1, %xmm1
; AVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
Expand All @@ -95,7 +95,7 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
;
; AVX512F-LABEL: var_funnnel_v2i64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512F-NEXT: vpsrlq $1, %xmm1, %xmm1
; AVX512F-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
Expand All @@ -117,7 +117,7 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
;
; AVX512BW-LABEL: var_funnnel_v2i64:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512BW-NEXT: vpsrlq $1, %xmm1, %xmm1
; AVX512BW-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
Expand Down Expand Up @@ -167,7 +167,7 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
;
; XOPAVX2-LABEL: var_funnnel_v2i64:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX2-NEXT: vpsrlq $1, %xmm1, %xmm1
; XOPAVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
Expand Down Expand Up @@ -547,7 +547,7 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt)
; AVX512BW-LABEL: var_funnnel_v8i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512BW-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512BW-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
Expand All @@ -569,7 +569,7 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt)
;
; AVX512VLBW-LABEL: var_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VLBW-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpsrlvw %xmm4, %xmm1, %xmm1
Expand All @@ -583,18 +583,31 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt)
; AVX512VLVBMI2-NEXT: vpshldvw %xmm2, %xmm1, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: var_funnnel_v8i16:
; XOP: # %bb.0:
; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; XOP-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOP-NEXT: vpshlw %xmm4, %xmm0, %xmm0
; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
; XOP-NEXT: vpsubw %xmm2, %xmm3, %xmm2
; XOP-NEXT: vpsrlw $1, %xmm1, %xmm1
; XOP-NEXT: vpshlw %xmm2, %xmm1, %xmm1
; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
; XOPAVX1-LABEL: var_funnnel_v8i16:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpshlw %xmm4, %xmm0, %xmm0
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; XOPAVX1-NEXT: vpsubw %xmm2, %xmm3, %xmm2
; XOPAVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
; XOPAVX1-NEXT: vpshlw %xmm2, %xmm1, %xmm1
; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
; XOPAVX2-LABEL: var_funnnel_v8i16:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX2-NEXT: vpshlw %xmm4, %xmm0, %xmm0
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
; XOPAVX2-NEXT: vpsubw %xmm2, %xmm3, %xmm2
; XOPAVX2-NEXT: vpsrlw $1, %xmm1, %xmm1
; XOPAVX2-NEXT: vpshlw %xmm2, %xmm1, %xmm1
; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX2-NEXT: retq
;
; X86-SSE2-LABEL: var_funnnel_v8i16:
; X86-SSE2: # %bb.0:
Expand Down Expand Up @@ -779,7 +792,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %amt)
;
; AVX512F-LABEL: var_funnnel_v16i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; AVX512F-NEXT: vpbroadcastb {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero,xmm4[4],zero,zero,zero,xmm4[5],zero,zero,zero,xmm4[6],zero,zero,zero,xmm4[7],zero,zero,zero,xmm4[8],zero,zero,zero,xmm4[9],zero,zero,zero,xmm4[10],zero,zero,zero,xmm4[11],zero,zero,zero,xmm4[12],zero,zero,zero,xmm4[13],zero,zero,zero,xmm4[14],zero,zero,zero,xmm4[15],zero,zero,zero
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
Expand All @@ -797,7 +810,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %amt)
;
; AVX512VL-LABEL: var_funnnel_v16i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero,xmm4[4],zero,zero,zero,xmm4[5],zero,zero,zero,xmm4[6],zero,zero,zero,xmm4[7],zero,zero,zero,xmm4[8],zero,zero,zero,xmm4[9],zero,zero,zero,xmm4[10],zero,zero,zero,xmm4[11],zero,zero,zero,xmm4[12],zero,zero,zero,xmm4[13],zero,zero,zero,xmm4[14],zero,zero,zero,xmm4[15],zero,zero,zero
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
Expand Down Expand Up @@ -871,19 +884,33 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %amt)
; AVX512VLVBMI2-NEXT: vzeroupper
; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: var_funnnel_v16i8:
; XOP: # %bb.0:
; XOP-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; XOP-NEXT: vpshlb %xmm3, %xmm1, %xmm1
; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOP-NEXT: vpxor %xmm5, %xmm5, %xmm5
; XOP-NEXT: vpsubb %xmm4, %xmm5, %xmm4
; XOP-NEXT: vpshlb %xmm4, %xmm1, %xmm1
; XOP-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOP-NEXT: vpshlb %xmm2, %xmm0, %xmm0
; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
; XOPAVX1-LABEL: var_funnnel_v16i8:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; XOPAVX1-NEXT: vpshlb %xmm3, %xmm1, %xmm1
; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
; XOPAVX1-NEXT: vpsubb %xmm4, %xmm5, %xmm4
; XOPAVX1-NEXT: vpshlb %xmm4, %xmm1, %xmm1
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpshlb %xmm2, %xmm0, %xmm0
; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
; XOPAVX2-LABEL: var_funnnel_v16i8:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; XOPAVX2-NEXT: vpshlb %xmm3, %xmm1, %xmm1
; XOPAVX2-NEXT: vpbroadcastb {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX2-NEXT: vpxor %xmm5, %xmm5, %xmm5
; XOPAVX2-NEXT: vpsubb %xmm4, %xmm5, %xmm4
; XOPAVX2-NEXT: vpshlb %xmm4, %xmm1, %xmm1
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX2-NEXT: vpshlb %xmm2, %xmm0, %xmm0
; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX2-NEXT: retq
;
; X86-SSE2-LABEL: var_funnnel_v16i8:
; X86-SSE2: # %bb.0:
Expand Down Expand Up @@ -952,20 +979,31 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
; SSE-NEXT: por %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: splatvar_funnnel_v2i64:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
; AVX-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
; AVX-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpsllq %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
; AVX1-LABEL: splatvar_funnnel_v2i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vpsrlq $1, %xmm1, %xmm1
; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splatvar_funnnel_v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX2-NEXT: vpsrlq $1, %xmm1, %xmm1
; AVX2-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vpsllq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v2i64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512F-NEXT: vpsrlq $1, %xmm1, %xmm1
; AVX512F-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
Expand All @@ -987,7 +1025,7 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
;
; AVX512BW-LABEL: splatvar_funnnel_v2i64:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512BW-NEXT: vpsrlq $1, %xmm1, %xmm1
; AVX512BW-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
Expand Down Expand Up @@ -1023,16 +1061,27 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
; AVX512VLVBMI2-NEXT: vpshldvq %xmm2, %xmm1, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: splatvar_funnnel_v2i64:
; XOP: # %bb.0:
; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOP-NEXT: vpsrlq $1, %xmm1, %xmm1
; XOP-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
; XOP-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOP-NEXT: vpsllq %xmm2, %xmm0, %xmm0
; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
; XOPAVX1-LABEL: splatvar_funnnel_v2i64:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpsrlq $1, %xmm1, %xmm1
; XOPAVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0
; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
; XOPAVX2-LABEL: splatvar_funnnel_v2i64:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX2-NEXT: vpsrlq $1, %xmm1, %xmm1
; XOPAVX2-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX2-NEXT: vpsllq %xmm2, %xmm0, %xmm0
; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX2-NEXT: retq
;
; X86-SSE2-LABEL: splatvar_funnnel_v2i64:
; X86-SSE2: # %bb.0:
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/X86/vector-fshl-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %
; AVX512BW-LABEL: var_funnnel_v16i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512BW-NEXT: vpandn %ymm3, %ymm2, %ymm4
; AVX512BW-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
Expand All @@ -415,7 +415,7 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %
;
; AVX512VLBW-LABEL: var_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512VLBW-NEXT: vpandn %ymm3, %ymm2, %ymm4
; AVX512VLBW-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpsrlvw %ymm4, %ymm1, %ymm1
Expand Down Expand Up @@ -451,7 +451,7 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %
;
; XOPAVX2-LABEL: var_funnnel_v16i16:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; XOPAVX2-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm4
; XOPAVX2-NEXT: vextracti128 $1, %ymm4, %xmm5
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm6
Expand Down Expand Up @@ -573,11 +573,11 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt)
;
; AVX512F-LABEL: var_funnnel_v32i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; AVX512F-NEXT: vpandn %ymm3, %ymm2, %ymm4
; AVX512F-NEXT: vpsllw $5, %ymm4, %ymm4
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm6
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm6, %ymm6
Expand Down Expand Up @@ -607,11 +607,11 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt)
;
; AVX512VL-LABEL: var_funnnel_v32i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; AVX512VL-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; AVX512VL-NEXT: vpandn %ymm3, %ymm2, %ymm4
; AVX512VL-NEXT: vpsllw $5, %ymm4, %ymm4
; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512VL-NEXT: vpbroadcastd {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512VL-NEXT: vpand %ymm5, %ymm1, %ymm1
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm6
; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm6, %ymm6
Expand Down Expand Up @@ -719,7 +719,7 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt)
; XOPAVX2-NEXT: vpshlb %xmm4, %xmm3, %xmm3
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
; XOPAVX2-NEXT: vextracti128 $1, %ymm2, %xmm5
; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm6 = [249,249,249,249,249,249,249,249,249,249,249,249,249,249,249,249]
; XOPAVX2-NEXT: vpbroadcastb {{.*#+}} xmm6 = [249,249,249,249,249,249,249,249,249,249,249,249,249,249,249,249]
; XOPAVX2-NEXT: vpaddb %xmm6, %xmm5, %xmm7
; XOPAVX2-NEXT: vpshlb %xmm7, %xmm3, %xmm3
; XOPAVX2-NEXT: vpshlb %xmm4, %xmm1, %xmm1
Expand Down Expand Up @@ -761,7 +761,7 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; AVX2-LABEL: splatvar_funnnel_v4i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX2-NEXT: vpsrlq $1, %ymm1, %ymm1
; AVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
Expand All @@ -772,7 +772,7 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; AVX512F-LABEL: splatvar_funnnel_v4i64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512F-NEXT: vpsrlq $1, %ymm1, %ymm1
; AVX512F-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
Expand All @@ -794,7 +794,7 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; AVX512BW-LABEL: splatvar_funnnel_v4i64:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512BW-NEXT: vpsrlq $1, %ymm1, %ymm1
; AVX512BW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
Expand Down Expand Up @@ -849,7 +849,7 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; XOPAVX2-LABEL: splatvar_funnnel_v4i64:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX2-NEXT: vpsrlq $1, %ymm1, %ymm1
; XOPAVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
Expand Down
Loading