110 changes: 36 additions & 74 deletions llvm/test/Analysis/CostModel/X86/rem.ll

Large diffs are not rendered by default.

530 changes: 176 additions & 354 deletions llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions llvm/test/Analysis/CostModel/X86/shuffle-reverse.ll
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512BW-LABEL: 'test_vXi16'
Expand Down Expand Up @@ -309,7 +309,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512BW-LABEL: 'test_vXi8'
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512BW-LABEL: 'test_vXi16'
Expand Down
41 changes: 14 additions & 27 deletions llvm/test/Analysis/CostModel/X86/trunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -120,31 +120,18 @@ define i32 @trunc_vXi16() {
; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'trunc_vXi16'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'trunc_vXi16'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
; AVX512-LABEL: 'trunc_vXi16'
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; BTVER2-LABEL: 'trunc_vXi16'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16>
Expand Down Expand Up @@ -285,13 +272,13 @@ define i32 @trunc_vXi8() {
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8>
Expand Down
170 changes: 56 additions & 114 deletions llvm/test/Analysis/CostModel/X86/vector-extract.ll

Large diffs are not rendered by default.

170 changes: 56 additions & 114 deletions llvm/test/Analysis/CostModel/X86/vector-insert.ll

Large diffs are not rendered by default.

92 changes: 45 additions & 47 deletions llvm/test/CodeGen/X86/avg-mask.ll
Original file line number Diff line number Diff line change
Expand Up @@ -123,33 +123,32 @@ define <32 x i8> @avg_v32i8_maskz(<32 x i8> %a, <32 x i8> %b, i32 %mask) nounwin
define <64 x i8> @avg_v64i8_mask(<64 x i8> %a, <64 x i8> %b, <64 x i8> %src, i64 %mask) nounwind {
; AVX512F-LABEL: avg_v64i8_mask:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm3
; AVX512F-NEXT: movq %rdi, %rax
; AVX512F-NEXT: movl %edi, %ecx
; AVX512F-NEXT: kmovw %edi, %k1
; AVX512F-NEXT: shrq $32, %rdi
; AVX512F-NEXT: shrq $48, %rax
; AVX512F-NEXT: shrl $16, %ecx
; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm4
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4
; AVX512F-NEXT: vpavgb %ymm3, %ymm4, %ymm3
; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm1
; AVX512F-NEXT: kmovw %ecx, %k2
; AVX512F-NEXT: kmovw %eax, %k3
; AVX512F-NEXT: kmovw %edi, %k4
; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z}
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k3} {z}
; AVX512F-NEXT: vpmovdb %zmm5, %xmm5
; AVX512F-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm1
; AVX512F-NEXT: vpblendvb %ymm1, %ymm0, %ymm3, %ymm0
; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k3} {z}
; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm2, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
; AVX512F-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k2} {z}
; AVX512F-NEXT: vpmovdb %zmm4, %xmm4
; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0
; AVX512F-NEXT: vpternlogq $202, %zmm2, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BWVL-LABEL: avg_v64i8_mask:
Expand Down Expand Up @@ -178,26 +177,26 @@ define <64 x i8> @avg_v64i8_maskz(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwin
; AVX512F-NEXT: shrq $32, %rdi
; AVX512F-NEXT: shrq $48, %rax
; AVX512F-NEXT: shrl $16, %ecx
; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpavgb %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: kmovw %ecx, %k2
; AVX512F-NEXT: kmovw %eax, %k3
; AVX512F-NEXT: kmovw %edi, %k4
; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z}
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k3} {z}
; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; AVX512F-NEXT: vpand %ymm0, %ymm1, %ymm0
; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k3} {z}
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1
; AVX512F-NEXT: vpandq %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BWVL-LABEL: avg_v64i8_maskz:
Expand Down Expand Up @@ -327,21 +326,20 @@ define <16 x i16> @avg_v16i16_maskz(<16 x i16> %a, <16 x i16> %b, i16 %mask) nou
define <32 x i16> @avg_v32i16_mask(<32 x i16> %a, <32 x i16> %b, <32 x i16> %src, i32 %mask) nounwind {
; AVX512F-LABEL: avg_v32i16_mask:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm3
; AVX512F-NEXT: kmovw %edi, %k1
; AVX512F-NEXT: shrl $16, %edi
; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm4
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4
; AVX512F-NEXT: vpavgw %ymm3, %ymm4, %ymm3
; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm1
; AVX512F-NEXT: kmovw %edi, %k2
; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
; AVX512F-NEXT: vpblendvb %ymm1, %ymm0, %ymm3, %ymm0
; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm2, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
; AVX512F-NEXT: vpmovdw %zmm3, %ymm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
; AVX512F-NEXT: vpternlogq $202, %zmm2, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BWVL-LABEL: avg_v32i16_mask:
Expand All @@ -366,18 +364,18 @@ define <32 x i16> @avg_v32i16_maskz(<32 x i16> %a, <32 x i16> %b, i32 %mask) nou
; AVX512F: # %bb.0:
; AVX512F-NEXT: kmovw %edi, %k1
; AVX512F-NEXT: shrl $16, %edi
; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpavgw %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: kmovw %edi, %k2
; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
; AVX512F-NEXT: vpand %ymm0, %ymm1, %ymm0
; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
; AVX512F-NEXT: vpmovdw %zmm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; AVX512F-NEXT: vpandq %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BWVL-LABEL: avg_v32i16_maskz:
Expand Down
793 changes: 323 additions & 470 deletions llvm/test/CodeGen/X86/avg.ll

Large diffs are not rendered by default.

75 changes: 23 additions & 52 deletions llvm/test/CodeGen/X86/avx512-calling-conv.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=ALL_X64 --check-prefix=KNL --check-prefix=KNL-NEW
; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=knl -x86-enable-old-knl-abi | FileCheck %s --check-prefix=ALL_X64 --check-prefix=KNL --check-prefix=KNL-OLD
; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=ALL_X64 --check-prefix=KNL
; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=skx | FileCheck %s --check-prefix=ALL_X64 --check-prefix=SKX
; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=KNL_X32
; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=skx -fast-isel | FileCheck %s --check-prefix=FASTISEL
Expand Down Expand Up @@ -552,31 +551,17 @@ define <1 x i1> @test13(<1 x i1>* %foo) {
}

define void @test14(<32 x i16>* %x) {
; KNL-NEW-LABEL: test14:
; KNL-NEW: ## %bb.0:
; KNL-NEW-NEXT: pushq %rbx
; KNL-NEW-NEXT: .cfi_def_cfa_offset 16
; KNL-NEW-NEXT: .cfi_offset %rbx, -16
; KNL-NEW-NEXT: movq %rdi, %rbx
; KNL-NEW-NEXT: vmovaps (%rdi), %zmm0
; KNL-NEW-NEXT: callq _test14_callee
; KNL-NEW-NEXT: vmovaps %zmm0, (%rbx)
; KNL-NEW-NEXT: popq %rbx
; KNL-NEW-NEXT: retq
;
; KNL-OLD-LABEL: test14:
; KNL-OLD: ## %bb.0:
; KNL-OLD-NEXT: pushq %rbx
; KNL-OLD-NEXT: .cfi_def_cfa_offset 16
; KNL-OLD-NEXT: .cfi_offset %rbx, -16
; KNL-OLD-NEXT: movq %rdi, %rbx
; KNL-OLD-NEXT: vmovaps (%rdi), %ymm0
; KNL-OLD-NEXT: vmovaps 32(%rdi), %ymm1
; KNL-OLD-NEXT: callq _test14_callee
; KNL-OLD-NEXT: vmovaps %ymm1, 32(%rbx)
; KNL-OLD-NEXT: vmovaps %ymm0, (%rbx)
; KNL-OLD-NEXT: popq %rbx
; KNL-OLD-NEXT: retq
; KNL-LABEL: test14:
; KNL: ## %bb.0:
; KNL-NEXT: pushq %rbx
; KNL-NEXT: .cfi_def_cfa_offset 16
; KNL-NEXT: .cfi_offset %rbx, -16
; KNL-NEXT: movq %rdi, %rbx
; KNL-NEXT: vmovaps (%rdi), %zmm0
; KNL-NEXT: callq _test14_callee
; KNL-NEXT: vmovaps %zmm0, (%rbx)
; KNL-NEXT: popq %rbx
; KNL-NEXT: retq
;
; SKX-LABEL: test14:
; SKX: ## %bb.0:
Expand Down Expand Up @@ -626,31 +611,17 @@ define void @test14(<32 x i16>* %x) {
declare <32 x i16> @test14_callee(<32 x i16>)

define void @test15(<64 x i8>* %x) {
; KNL-NEW-LABEL: test15:
; KNL-NEW: ## %bb.0:
; KNL-NEW-NEXT: pushq %rbx
; KNL-NEW-NEXT: .cfi_def_cfa_offset 16
; KNL-NEW-NEXT: .cfi_offset %rbx, -16
; KNL-NEW-NEXT: movq %rdi, %rbx
; KNL-NEW-NEXT: vmovaps (%rdi), %zmm0
; KNL-NEW-NEXT: callq _test15_callee
; KNL-NEW-NEXT: vmovaps %zmm0, (%rbx)
; KNL-NEW-NEXT: popq %rbx
; KNL-NEW-NEXT: retq
;
; KNL-OLD-LABEL: test15:
; KNL-OLD: ## %bb.0:
; KNL-OLD-NEXT: pushq %rbx
; KNL-OLD-NEXT: .cfi_def_cfa_offset 16
; KNL-OLD-NEXT: .cfi_offset %rbx, -16
; KNL-OLD-NEXT: movq %rdi, %rbx
; KNL-OLD-NEXT: vmovaps (%rdi), %ymm0
; KNL-OLD-NEXT: vmovaps 32(%rdi), %ymm1
; KNL-OLD-NEXT: callq _test15_callee
; KNL-OLD-NEXT: vmovaps %ymm1, 32(%rbx)
; KNL-OLD-NEXT: vmovaps %ymm0, (%rbx)
; KNL-OLD-NEXT: popq %rbx
; KNL-OLD-NEXT: retq
; KNL-LABEL: test15:
; KNL: ## %bb.0:
; KNL-NEXT: pushq %rbx
; KNL-NEXT: .cfi_def_cfa_offset 16
; KNL-NEXT: .cfi_offset %rbx, -16
; KNL-NEXT: movq %rdi, %rbx
; KNL-NEXT: vmovaps (%rdi), %zmm0
; KNL-NEXT: callq _test15_callee
; KNL-NEXT: vmovaps %zmm0, (%rbx)
; KNL-NEXT: popq %rbx
; KNL-NEXT: retq
;
; SKX-LABEL: test15:
; SKX: ## %bb.0:
Expand Down
262 changes: 128 additions & 134 deletions llvm/test/CodeGen/X86/avx512-ext.ll

Large diffs are not rendered by default.

460 changes: 192 additions & 268 deletions llvm/test/CodeGen/X86/avx512-insert-extract.ll

Large diffs are not rendered by default.

24 changes: 8 additions & 16 deletions llvm/test/CodeGen/X86/avx512-logic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ entry:
define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) {
; KNL-LABEL: and_v64i8:
; KNL: ## %bb.0:
; KNL-NEXT: vpandd %zmm1, %zmm0, %zmm0
; KNL-NEXT: vpandq %zmm1, %zmm0, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: and_v64i8:
Expand All @@ -180,11 +180,7 @@ define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) {
define <64 x i8> @andn_v64i8(<64 x i8> %a, <64 x i8> %b) {
; KNL-LABEL: andn_v64i8:
; KNL: ## %bb.0:
; KNL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
; KNL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
; KNL-NEXT: vandnps %ymm2, %ymm3, %ymm2
; KNL-NEXT: vandnps %ymm0, %ymm1, %ymm0
; KNL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; KNL-NEXT: vpandnq %zmm0, %zmm1, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: andn_v64i8:
Expand All @@ -202,7 +198,7 @@ define <64 x i8> @andn_v64i8(<64 x i8> %a, <64 x i8> %b) {
define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) {
; KNL-LABEL: or_v64i8:
; KNL: ## %bb.0:
; KNL-NEXT: vpord %zmm1, %zmm0, %zmm0
; KNL-NEXT: vporq %zmm1, %zmm0, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: or_v64i8:
Expand All @@ -216,7 +212,7 @@ define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) {
define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) {
; KNL-LABEL: xor_v64i8:
; KNL: ## %bb.0:
; KNL-NEXT: vpxord %zmm1, %zmm0, %zmm0
; KNL-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: xor_v64i8:
Expand All @@ -230,7 +226,7 @@ define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) {
define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) {
; KNL-LABEL: and_v32i16:
; KNL: ## %bb.0:
; KNL-NEXT: vpandd %zmm1, %zmm0, %zmm0
; KNL-NEXT: vpandq %zmm1, %zmm0, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: and_v32i16:
Expand All @@ -244,11 +240,7 @@ define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) {
define <32 x i16> @andn_v32i16(<32 x i16> %a, <32 x i16> %b) {
; KNL-LABEL: andn_v32i16:
; KNL: ## %bb.0:
; KNL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
; KNL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
; KNL-NEXT: vandnps %ymm2, %ymm3, %ymm2
; KNL-NEXT: vandnps %ymm0, %ymm1, %ymm0
; KNL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; KNL-NEXT: vpandnq %zmm0, %zmm1, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: andn_v32i16:
Expand All @@ -264,7 +256,7 @@ define <32 x i16> @andn_v32i16(<32 x i16> %a, <32 x i16> %b) {
define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) {
; KNL-LABEL: or_v32i16:
; KNL: ## %bb.0:
; KNL-NEXT: vpord %zmm1, %zmm0, %zmm0
; KNL-NEXT: vporq %zmm1, %zmm0, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: or_v32i16:
Expand All @@ -278,7 +270,7 @@ define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) {
define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) {
; KNL-LABEL: xor_v32i16:
; KNL: ## %bb.0:
; KNL-NEXT: vpxord %zmm1, %zmm0, %zmm0
; KNL-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: xor_v32i16:
Expand Down
340 changes: 144 additions & 196 deletions llvm/test/CodeGen/X86/avx512-mask-op.ll

Large diffs are not rendered by default.

120 changes: 32 additions & 88 deletions llvm/test/CodeGen/X86/avx512-select.ll
Original file line number Diff line number Diff line change
Expand Up @@ -434,101 +434,45 @@ define <16 x i16> @pr31515(<16 x i1> %a, <16 x i1> %b, <16 x i16> %c) nounwind {
}

define <32 x i16> @pr42355_v32i16(i1 %c, <32 x i16> %x, <32 x i16> %y) {
; X86-AVX512F-LABEL: pr42355_v32i16:
; X86-AVX512F: # %bb.0:
; X86-AVX512F-NEXT: testb $1, {{[0-9]+}}(%esp)
; X86-AVX512F-NEXT: jne .LBB14_1
; X86-AVX512F-NEXT: # %bb.2:
; X86-AVX512F-NEXT: vextractf64x4 $1, %zmm1, %ymm2
; X86-AVX512F-NEXT: vmovaps %ymm1, %ymm0
; X86-AVX512F-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; X86-AVX512F-NEXT: retl
; X86-AVX512F-NEXT: .LBB14_1:
; X86-AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm2
; X86-AVX512F-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; X86-AVX512F-NEXT: retl
;
; X64-AVX512F-LABEL: pr42355_v32i16:
; X64-AVX512F: # %bb.0:
; X64-AVX512F-NEXT: testb $1, %dil
; X64-AVX512F-NEXT: jne .LBB14_1
; X64-AVX512F-NEXT: # %bb.2:
; X64-AVX512F-NEXT: vextractf64x4 $1, %zmm1, %ymm2
; X64-AVX512F-NEXT: vmovaps %ymm1, %ymm0
; X64-AVX512F-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; X64-AVX512F-NEXT: retq
; X64-AVX512F-NEXT: .LBB14_1:
; X64-AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm2
; X64-AVX512F-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; X64-AVX512F-NEXT: retq
;
; X86-AVX512BW-LABEL: pr42355_v32i16:
; X86-AVX512BW: # %bb.0:
; X86-AVX512BW-NEXT: testb $1, {{[0-9]+}}(%esp)
; X86-AVX512BW-NEXT: jne .LBB14_2
; X86-AVX512BW-NEXT: # %bb.1:
; X86-AVX512BW-NEXT: vmovaps %zmm1, %zmm0
; X86-AVX512BW-NEXT: .LBB14_2:
; X86-AVX512BW-NEXT: retl
; X86-LABEL: pr42355_v32i16:
; X86: # %bb.0:
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
; X86-NEXT: jne .LBB14_2
; X86-NEXT: # %bb.1:
; X86-NEXT: vmovaps %zmm1, %zmm0
; X86-NEXT: .LBB14_2:
; X86-NEXT: retl
;
; X64-AVX512BW-LABEL: pr42355_v32i16:
; X64-AVX512BW: # %bb.0:
; X64-AVX512BW-NEXT: testb $1, %dil
; X64-AVX512BW-NEXT: jne .LBB14_2
; X64-AVX512BW-NEXT: # %bb.1:
; X64-AVX512BW-NEXT: vmovaps %zmm1, %zmm0
; X64-AVX512BW-NEXT: .LBB14_2:
; X64-AVX512BW-NEXT: retq
; X64-LABEL: pr42355_v32i16:
; X64: # %bb.0:
; X64-NEXT: testb $1, %dil
; X64-NEXT: jne .LBB14_2
; X64-NEXT: # %bb.1:
; X64-NEXT: vmovaps %zmm1, %zmm0
; X64-NEXT: .LBB14_2:
; X64-NEXT: retq
%a = select i1 %c, <32 x i16> %x, <32 x i16> %y
ret <32 x i16> %a
}

define <64 x i8> @pr42355_v64i8(i1 %c, <64 x i8> %x, <64 x i8> %y) {
; X86-AVX512F-LABEL: pr42355_v64i8:
; X86-AVX512F: # %bb.0:
; X86-AVX512F-NEXT: testb $1, {{[0-9]+}}(%esp)
; X86-AVX512F-NEXT: jne .LBB15_1
; X86-AVX512F-NEXT: # %bb.2:
; X86-AVX512F-NEXT: vextractf64x4 $1, %zmm1, %ymm2
; X86-AVX512F-NEXT: vmovaps %ymm1, %ymm0
; X86-AVX512F-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; X86-AVX512F-NEXT: retl
; X86-AVX512F-NEXT: .LBB15_1:
; X86-AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm2
; X86-AVX512F-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; X86-AVX512F-NEXT: retl
;
; X64-AVX512F-LABEL: pr42355_v64i8:
; X64-AVX512F: # %bb.0:
; X64-AVX512F-NEXT: testb $1, %dil
; X64-AVX512F-NEXT: jne .LBB15_1
; X64-AVX512F-NEXT: # %bb.2:
; X64-AVX512F-NEXT: vextractf64x4 $1, %zmm1, %ymm2
; X64-AVX512F-NEXT: vmovaps %ymm1, %ymm0
; X64-AVX512F-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; X64-AVX512F-NEXT: retq
; X64-AVX512F-NEXT: .LBB15_1:
; X64-AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm2
; X64-AVX512F-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; X64-AVX512F-NEXT: retq
;
; X86-AVX512BW-LABEL: pr42355_v64i8:
; X86-AVX512BW: # %bb.0:
; X86-AVX512BW-NEXT: testb $1, {{[0-9]+}}(%esp)
; X86-AVX512BW-NEXT: jne .LBB15_2
; X86-AVX512BW-NEXT: # %bb.1:
; X86-AVX512BW-NEXT: vmovaps %zmm1, %zmm0
; X86-AVX512BW-NEXT: .LBB15_2:
; X86-AVX512BW-NEXT: retl
; X86-LABEL: pr42355_v64i8:
; X86: # %bb.0:
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
; X86-NEXT: jne .LBB15_2
; X86-NEXT: # %bb.1:
; X86-NEXT: vmovaps %zmm1, %zmm0
; X86-NEXT: .LBB15_2:
; X86-NEXT: retl
;
; X64-AVX512BW-LABEL: pr42355_v64i8:
; X64-AVX512BW: # %bb.0:
; X64-AVX512BW-NEXT: testb $1, %dil
; X64-AVX512BW-NEXT: jne .LBB15_2
; X64-AVX512BW-NEXT: # %bb.1:
; X64-AVX512BW-NEXT: vmovaps %zmm1, %zmm0
; X64-AVX512BW-NEXT: .LBB15_2:
; X64-AVX512BW-NEXT: retq
; X64-LABEL: pr42355_v64i8:
; X64: # %bb.0:
; X64-NEXT: testb $1, %dil
; X64-NEXT: jne .LBB15_2
; X64-NEXT: # %bb.1:
; X64-NEXT: vmovaps %zmm1, %zmm0
; X64-NEXT: .LBB15_2:
; X64-NEXT: retq
%a = select i1 %c, <64 x i8> %x, <64 x i8> %y
ret <64 x i8> %a
}
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/avx512-trunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -454,12 +454,12 @@ define void @trunc_dw_128_mem(<4 x i32> %i, <4 x i16>* %res) #0 {
define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
; KNL-LABEL: trunc_wb_512:
; KNL: ## %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; KNL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; KNL-NEXT: vpmovdb %zmm1, %xmm1
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
; KNL-NEXT: vpmovdb %zmm1, %xmm1
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: trunc_wb_512:
Expand Down
20 changes: 12 additions & 8 deletions llvm/test/CodeGen/X86/avx512-vbroadcasti128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,11 @@ define <16 x i32> @test_broadcast_4i32_16i32(<4 x i32> *%p) nounwind {
define <32 x i16> @test_broadcast_8i16_32i16(<8 x i16> *%p) nounwind {
; X64-AVX512VL-LABEL: test_broadcast_8i16_32i16:
; X64-AVX512VL: ## %bb.0:
; X64-AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X64-AVX512VL-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X64-AVX512VL-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm1
; X64-AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; X64-AVX512VL-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0
; X64-AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; X64-AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; X64-AVX512VL-NEXT: retq
;
; X64-AVX512BWVL-LABEL: test_broadcast_8i16_32i16:
Expand All @@ -148,10 +149,11 @@ define <32 x i16> @test_broadcast_8i16_32i16(<8 x i16> *%p) nounwind {
;
; X64-AVX512DQVL-LABEL: test_broadcast_8i16_32i16:
; X64-AVX512DQVL: ## %bb.0:
; X64-AVX512DQVL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X64-AVX512DQVL-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X64-AVX512DQVL-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm1
; X64-AVX512DQVL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; X64-AVX512DQVL-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0
; X64-AVX512DQVL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; X64-AVX512DQVL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; X64-AVX512DQVL-NEXT: retq
%1 = load <8 x i16>, <8 x i16> *%p
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
Expand All @@ -162,10 +164,11 @@ define <32 x i16> @test_broadcast_8i16_32i16(<8 x i16> *%p) nounwind {
define <64 x i8> @test_broadcast_16i8_64i8(<16 x i8> *%p) nounwind {
; X64-AVX512VL-LABEL: test_broadcast_16i8_64i8:
; X64-AVX512VL: ## %bb.0:
; X64-AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X64-AVX512VL-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X64-AVX512VL-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm1
; X64-AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; X64-AVX512VL-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0
; X64-AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; X64-AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; X64-AVX512VL-NEXT: retq
;
; X64-AVX512BWVL-LABEL: test_broadcast_16i8_64i8:
Expand All @@ -176,10 +179,11 @@ define <64 x i8> @test_broadcast_16i8_64i8(<16 x i8> *%p) nounwind {
;
; X64-AVX512DQVL-LABEL: test_broadcast_16i8_64i8:
; X64-AVX512DQVL: ## %bb.0:
; X64-AVX512DQVL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X64-AVX512DQVL-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X64-AVX512DQVL-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm1
; X64-AVX512DQVL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; X64-AVX512DQVL-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0
; X64-AVX512DQVL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; X64-AVX512DQVL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; X64-AVX512DQVL-NEXT: retq
%1 = load <16 x i8>, <16 x i8> *%p
%2 = shufflevector <16 x i8> %1, <16 x i8> undef, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
Expand Down
20 changes: 12 additions & 8 deletions llvm/test/CodeGen/X86/avx512-vbroadcasti256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,11 @@ define <16 x i32> @test_broadcast_8i32_16i32(<8 x i32> *%p) nounwind {
define <32 x i16> @test_broadcast_16i16_32i16(<16 x i16> *%p) nounwind {
; X64-AVX512VL-LABEL: test_broadcast_16i16_32i16:
; X64-AVX512VL: ## %bb.0:
; X64-AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
; X64-AVX512VL-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
; X64-AVX512VL-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm1
; X64-AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; X64-AVX512VL-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0
; X64-AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; X64-AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; X64-AVX512VL-NEXT: retq
;
; X64-AVX512BWVL-LABEL: test_broadcast_16i16_32i16:
Expand All @@ -68,10 +69,11 @@ define <32 x i16> @test_broadcast_16i16_32i16(<16 x i16> *%p) nounwind {
;
; X64-AVX512DQVL-LABEL: test_broadcast_16i16_32i16:
; X64-AVX512DQVL: ## %bb.0:
; X64-AVX512DQVL-NEXT: vmovdqa (%rdi), %ymm0
; X64-AVX512DQVL-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
; X64-AVX512DQVL-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm1
; X64-AVX512DQVL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; X64-AVX512DQVL-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0
; X64-AVX512DQVL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; X64-AVX512DQVL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; X64-AVX512DQVL-NEXT: retq
%1 = load <16 x i16>, <16 x i16> *%p
%2 = shufflevector <16 x i16> %1, <16 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
Expand All @@ -82,10 +84,11 @@ define <32 x i16> @test_broadcast_16i16_32i16(<16 x i16> *%p) nounwind {
define <64 x i8> @test_broadcast_32i8_64i8(<32 x i8> *%p) nounwind {
; X64-AVX512VL-LABEL: test_broadcast_32i8_64i8:
; X64-AVX512VL: ## %bb.0:
; X64-AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
; X64-AVX512VL-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
; X64-AVX512VL-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm1
; X64-AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; X64-AVX512VL-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0
; X64-AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; X64-AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; X64-AVX512VL-NEXT: retq
;
; X64-AVX512BWVL-LABEL: test_broadcast_32i8_64i8:
Expand All @@ -96,10 +99,11 @@ define <64 x i8> @test_broadcast_32i8_64i8(<32 x i8> *%p) nounwind {
;
; X64-AVX512DQVL-LABEL: test_broadcast_32i8_64i8:
; X64-AVX512DQVL: ## %bb.0:
; X64-AVX512DQVL-NEXT: vmovdqa (%rdi), %ymm0
; X64-AVX512DQVL-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
; X64-AVX512DQVL-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm1
; X64-AVX512DQVL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; X64-AVX512DQVL-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0
; X64-AVX512DQVL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; X64-AVX512DQVL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; X64-AVX512DQVL-NEXT: retq
%1 = load <32 x i8>, <32 x i8> *%p
%2 = shufflevector <32 x i8> %1, <32 x i8> undef, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
Expand Down
16 changes: 10 additions & 6 deletions llvm/test/CodeGen/X86/avx512-vec-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -245,21 +245,25 @@ define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
; KNL-LABEL: test12_v64i16:
; KNL: ## %bb.0:
; KNL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x75,0xc4]
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xc0]
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm4 ## encoding: [0xc5,0xfd,0x75,0xe2]
; KNL-NEXT: vpmovsxwd %ymm4, %zmm4 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xe4]
; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0 ## encoding: [0x62,0xf2,0x5d,0x48,0x27,0xc4]
; KNL-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; KNL-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm0 ## encoding: [0xc5,0xf5,0x75,0xc5]
; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xd2,0x01]
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xc0,0x01]
; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x75,0xc2]
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xc0]
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
; KNL-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
; KNL-NEXT: shll $16, %ecx ## encoding: [0xc1,0xe1,0x10]
; KNL-NEXT: orl %eax, %ecx ## encoding: [0x09,0xc1]
; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm0 ## encoding: [0xc5,0xed,0x75,0xc6]
; KNL-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm0 ## encoding: [0xc5,0xf5,0x75,0xc3]
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xc0]
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
; KNL-NEXT: kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
; KNL-NEXT: vpcmpeqw %ymm7, %ymm3, %ymm0 ## encoding: [0xc5,0xe5,0x75,0xc7]
; KNL-NEXT: vextracti64x4 $1, %zmm3, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xd8,0x01]
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ## encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xc9,0x01]
; KNL-NEXT: vpcmpeqw %ymm0, %ymm1, %ymm0 ## encoding: [0xc5,0xf5,0x75,0xc0]
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xc0]
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
; KNL-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
Expand Down
35 changes: 12 additions & 23 deletions llvm/test/CodeGen/X86/avx512-vselect.ll
Original file line number Diff line number Diff line change
Expand Up @@ -173,31 +173,20 @@ define <64 x i16> @test8(<64 x i8> %x, <64 x i16> %a, <64 x i16> %b) {
;
; CHECK-KNL-LABEL: test8:
; CHECK-KNL: # %bb.0:
; CHECK-KNL-NEXT: pushq %rbp
; CHECK-KNL-NEXT: .cfi_def_cfa_offset 16
; CHECK-KNL-NEXT: .cfi_offset %rbp, -16
; CHECK-KNL-NEXT: movq %rsp, %rbp
; CHECK-KNL-NEXT: .cfi_def_cfa_register %rbp
; CHECK-KNL-NEXT: andq $-32, %rsp
; CHECK-KNL-NEXT: subq $32, %rsp
; CHECK-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm8
; CHECK-KNL-NEXT: vmovdqa 16(%rbp), %ymm9
; CHECK-KNL-NEXT: vpxor %xmm10, %xmm10, %xmm10
; CHECK-KNL-NEXT: vpcmpeqb %ymm0, %ymm10, %ymm11
; CHECK-KNL-NEXT: vpmovsxbw %xmm11, %ymm0
; CHECK-KNL-NEXT: vpblendvb %ymm0, %ymm1, %ymm5, %ymm0
; CHECK-KNL-NEXT: vextracti128 $1, %ymm11, %xmm1
; CHECK-KNL-NEXT: vpmovsxbw %xmm1, %ymm1
; CHECK-KNL-NEXT: vpblendvb %ymm1, %ymm2, %ymm6, %ymm1
; CHECK-KNL-NEXT: vpcmpeqb %ymm10, %ymm8, %ymm5
; CHECK-KNL-NEXT: vpmovsxbw %xmm5, %ymm2
; CHECK-KNL-NEXT: vpblendvb %ymm2, %ymm3, %ymm7, %ymm2
; CHECK-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm5
; CHECK-KNL-NEXT: vpxor %xmm6, %xmm6, %xmm6
; CHECK-KNL-NEXT: vpcmpeqb %ymm6, %ymm5, %ymm5
; CHECK-KNL-NEXT: vpcmpeqb %ymm6, %ymm0, %ymm0
; CHECK-KNL-NEXT: vpmovsxbw %xmm0, %ymm6
; CHECK-KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; CHECK-KNL-NEXT: vpmovsxbw %xmm0, %ymm0
; CHECK-KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm6, %zmm0
; CHECK-KNL-NEXT: vpternlogq $202, %zmm3, %zmm1, %zmm0
; CHECK-KNL-NEXT: vpmovsxbw %xmm5, %ymm1
; CHECK-KNL-NEXT: vextracti128 $1, %ymm5, %xmm3
; CHECK-KNL-NEXT: vpmovsxbw %xmm3, %ymm3
; CHECK-KNL-NEXT: vpblendvb %ymm3, %ymm4, %ymm9, %ymm3
; CHECK-KNL-NEXT: movq %rbp, %rsp
; CHECK-KNL-NEXT: popq %rbp
; CHECK-KNL-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-KNL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
; CHECK-KNL-NEXT: vpternlogq $202, %zmm4, %zmm2, %zmm1
; CHECK-KNL-NEXT: retq
%c = icmp eq <64 x i8> %x, zeroinitializer
%ret = select <64 x i1> %c, <64 x i16> %a, <64 x i16> %b
Expand Down
114 changes: 57 additions & 57 deletions llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -918,15 +918,15 @@ define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__
;
; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm2
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: shll $16, %eax
; NoVLX-NEXT: orl %ecx, %eax
Expand All @@ -951,12 +951,12 @@ define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>
;
; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm0
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
Expand Down Expand Up @@ -991,8 +991,8 @@ define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: shrl $16, %edi
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
Expand Down Expand Up @@ -5722,15 +5722,15 @@ define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %_
;
; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm2
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: shll $16, %eax
; NoVLX-NEXT: orl %ecx, %eax
Expand All @@ -5755,12 +5755,12 @@ define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64
;
; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm1, %ymm0
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
Expand Down Expand Up @@ -5795,8 +5795,8 @@ define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask(i32 zeroext %__u, <8
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: shrl $16, %edi
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
Expand Down Expand Up @@ -10574,17 +10574,17 @@ define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %_
;
; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2
; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: shll $16, %eax
; NoVLX-NEXT: orl %ecx, %eax
Expand All @@ -10609,18 +10609,18 @@ define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64
;
; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; NoVLX-NEXT: vmovdqa (%rdi), %ymm2
; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm3
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm2
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm1
; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: shll $16, %eax
; NoVLX-NEXT: orl %ecx, %eax
Expand Down Expand Up @@ -10697,8 +10697,8 @@ define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask_mem(i32 zeroext %__u,
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: shrl $16, %edi
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm1
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
Expand Down Expand Up @@ -15454,19 +15454,19 @@ define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %_
;
; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2
; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vpmaxuw %ymm3, %ymm2, %ymm0
; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm2, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: shll $16, %eax
; NoVLX-NEXT: orl %ecx, %eax
Expand All @@ -15491,15 +15491,15 @@ define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64
;
; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm2
; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm1
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm1
; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vpmaxuw 32(%rdi), %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vpmaxuw 32(%rdi), %ymm0, %ymm1
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
Expand Down Expand Up @@ -15537,8 +15537,8 @@ define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask(i32 zeroext %__u, <8
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: shrl $16, %edi
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
Expand Down
56 changes: 18 additions & 38 deletions llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -266,20 +266,20 @@ define i32 @v32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i16> %d) {
;
; AVX512F-LABEL: v32i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm3, %ymm4
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm5
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm4
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5
; AVX512F-NEXT: vpcmpgtw %ymm4, %ymm5, %ymm4
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm5
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm6
; AVX512F-NEXT: vpcmpgtw %ymm5, %ymm6, %ymm5
; AVX512F-NEXT: vpand %ymm4, %ymm5, %ymm4
; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm1
; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vextracti64x4 $1, %zmm3, %ymm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm5
; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm5, %ymm1
; AVX512F-NEXT: vpand %ymm1, %ymm4, %ymm1
; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %ecx
; AVX512F-NEXT: vpmovsxwd %ymm4, %zmm0
; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: shll $16, %eax
Expand Down Expand Up @@ -558,38 +558,18 @@ define i64 @v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i8> %d) {
;
; AVX512F-LABEL: v64i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm3, %ymm4
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm5
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm4
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5
; AVX512F-NEXT: vpcmpgtb %ymm4, %ymm5, %ymm4
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm5
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm6
; AVX512F-NEXT: vpcmpgtb %ymm5, %ymm6, %ymm5
; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm6
; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm7
; AVX512F-NEXT: vpand %xmm7, %xmm6, %xmm6
; AVX512F-NEXT: vextracti64x4 $1, %zmm3, %ymm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm5
; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm5, %ymm1
; AVX512F-NEXT: vpand %ymm1, %ymm4, %ymm1
; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3
; AVX512F-NEXT: vpand %xmm3, %xmm1, %xmm1
; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %ecx
; AVX512F-NEXT: shll $16, %ecx
; AVX512F-NEXT: orl %eax, %ecx
; AVX512F-NEXT: vpand %xmm4, %xmm5, %xmm0
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %edx
; AVX512F-NEXT: vpmovsxbd %xmm6, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: shll $16, %eax
; AVX512F-NEXT: orl %edx, %eax
; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpmovmskb %ymm0, %ecx
; AVX512F-NEXT: vpmovmskb %ymm1, %eax
; AVX512F-NEXT: shlq $32, %rax
; AVX512F-NEXT: orq %rcx, %rax
; AVX512F-NEXT: vzeroupper
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -777,13 +777,13 @@ define <32 x i16> @ext_i32_32i16(i32 %a0) {
; AVX512F-NEXT: kmovw %edi, %k1
; AVX512F-NEXT: shrl $16, %edi
; AVX512F-NEXT: kmovw %edi, %k2
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0
; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
; AVX512F-NEXT: vpsrlw $15, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VLBW-LABEL: ext_i32_32i16:
Expand Down
40 changes: 20 additions & 20 deletions llvm/test/CodeGen/X86/bitcast-setcc-512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,15 @@ define i32 @v32i16(<32 x i16> %a, <32 x i16> %b) {
;
; AVX512F-LABEL: v32i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm2
; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
; AVX512F-NEXT: kmovw %k0, %ecx
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %ecx
; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: shll $16, %eax
; AVX512F-NEXT: orl %ecx, %eax
Expand Down Expand Up @@ -450,15 +450,15 @@ define void @bitcast_64i8_store(i64* %p, <64 x i8> %a0) {
;
; AVX512F-LABEL: bitcast_64i8_store:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm3
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm2
; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm3
; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm2, %ymm0
; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm2
; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k1
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k2
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
Expand Down Expand Up @@ -520,12 +520,12 @@ define void @bitcast_32i16_store(i32* %p, <32 x i16> %a0) {
;
; AVX512F-LABEL: bitcast_32i16_store:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm0
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2
; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: kmovw %k1, 2(%rdi)
Expand Down
56 changes: 10 additions & 46 deletions llvm/test/CodeGen/X86/fast-isel-nontemporal.ll
Original file line number Diff line number Diff line change
Expand Up @@ -889,29 +889,11 @@ define void @test_nt64xi8(<64 x i8>* nocapture %ptr, <64 x i8> %X) {
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512VL-LABEL: test_nt64xi8:
; AVX512VL: # %bb.0: # %entry
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512VL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VL-NEXT: vmovntdq %ymm0, (%rdi)
; AVX512VL-NEXT: vmovntdq %ymm1, 32(%rdi)
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512F-LABEL: test_nt64xi8:
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT: vmovntdq %ymm0, (%rdi)
; AVX512F-NEXT: vmovntdq %ymm1, 32(%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: test_nt64xi8:
; AVX512BW: # %bb.0: # %entry
; AVX512BW-NEXT: vmovntdq %zmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512-LABEL: test_nt64xi8:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovntdq %zmm0, (%rdi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
entry:
store <64 x i8> %X, <64 x i8>* %ptr, align 64, !nontemporal !1
ret void
Expand All @@ -933,29 +915,11 @@ define void @test_nt32xi16(<32 x i16>* nocapture %ptr, <32 x i16> %X) {
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512VL-LABEL: test_nt32xi16:
; AVX512VL: # %bb.0: # %entry
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512VL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VL-NEXT: vmovntdq %ymm0, (%rdi)
; AVX512VL-NEXT: vmovntdq %ymm1, 32(%rdi)
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512F-LABEL: test_nt32xi16:
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT: vmovntdq %ymm0, (%rdi)
; AVX512F-NEXT: vmovntdq %ymm1, 32(%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: test_nt32xi16:
; AVX512BW: # %bb.0: # %entry
; AVX512BW-NEXT: vmovntdq %zmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512-LABEL: test_nt32xi16:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovntdq %zmm0, (%rdi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
entry:
store <32 x i16> %X, <32 x i16>* %ptr, align 64, !nontemporal !1
ret void
Expand Down
136 changes: 68 additions & 68 deletions llvm/test/CodeGen/X86/kshift.ll
Original file line number Diff line number Diff line change
Expand Up @@ -61,23 +61,23 @@ define i16 @kshiftl_v16i1_1(<16 x i32> %x, <16 x i32> %y) {
define i32 @kshiftl_v32i1_1(<32 x i16> %x, <32 x i16> %y) {
; KNL-LABEL: kshiftl_v32i1_1:
; KNL: # %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; KNL-NEXT: vpxor %xmm4, %xmm4, %xmm4
; KNL-NEXT: vpcmpeqw %ymm4, %ymm3, %ymm3
; KNL-NEXT: vpmovsxwd %ymm3, %zmm3
; KNL-NEXT: vptestmd %zmm3, %zmm3, %k1
; KNL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2
; KNL-NEXT: vpmovsxwd %ymm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1
; KNL-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k2
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; KNL-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
; KNL-NEXT: valignd {{.*#+}} zmm0 = zmm0[15],zmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; KNL-NEXT: valignd {{.*#+}} zmm0 = zmm0[15],zmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; KNL-NEXT: kshiftlw $1, %k2, %k1
; KNL-NEXT: vpcmpeqw %ymm4, %ymm2, %ymm2
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; KNL-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2
; KNL-NEXT: vpmovsxwd %ymm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k2
; KNL-NEXT: vpcmpeqw %ymm4, %ymm1, %ymm1
; KNL-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1
; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k1}
; KNL-NEXT: kmovw %k0, %ecx
Expand Down Expand Up @@ -107,16 +107,15 @@ define i32 @kshiftl_v32i1_1(<32 x i16> %x, <32 x i16> %y) {
define i64 @kshiftl_v64i1_1(<64 x i8> %x, <64 x i8> %y) {
; KNL-LABEL: kshiftl_v64i1_1:
; KNL: # %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; KNL-NEXT: vpxor %xmm4, %xmm4, %xmm4
; KNL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm5
; KNL-NEXT: vptestmd %zmm5, %zmm5, %k1
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k2
; KNL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm0
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm3
; KNL-NEXT: vpmovsxbd %xmm3, %zmm4
; KNL-NEXT: vptestmd %zmm4, %zmm4, %k1
; KNL-NEXT: vextracti128 $1, %ymm3, %xmm3
; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
; KNL-NEXT: vptestmd %zmm3, %zmm3, %k2
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm3
; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
; KNL-NEXT: vptestmd %zmm3, %zmm3, %k3
Expand All @@ -125,25 +124,26 @@ define i64 @kshiftl_v64i1_1(<64 x i8> %x, <64 x i8> %y) {
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z}
; KNL-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k3} {z}
; KNL-NEXT: valignd {{.*#+}} zmm3 = zmm0[15],zmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; KNL-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z}
; KNL-NEXT: valignd {{.*#+}} zmm0 = zmm5[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; KNL-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k1} {z}
; KNL-NEXT: valignd {{.*#+}} zmm5 = zmm6[15],zmm5[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; KNL-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k2} {z}
; KNL-NEXT: valignd {{.*#+}} zmm0 = zmm4[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; KNL-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z}
; KNL-NEXT: valignd {{.*#+}} zmm4 = zmm5[15],zmm4[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; KNL-NEXT: kshiftlw $1, %k1, %k3
; KNL-NEXT: vpcmpeqb %ymm4, %ymm2, %ymm2
; KNL-NEXT: vextracti128 $1, %ymm2, %xmm6
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm5
; KNL-NEXT: vpcmpeqb %ymm2, %ymm5, %ymm5
; KNL-NEXT: vextracti128 $1, %ymm5, %xmm6
; KNL-NEXT: vpmovsxbd %xmm6, %zmm6
; KNL-NEXT: vptestmd %zmm6, %zmm6, %k1
; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k2
; KNL-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm1
; KNL-NEXT: vpmovsxbd %xmm5, %zmm5
; KNL-NEXT: vptestmd %zmm5, %zmm5, %k2
; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2
; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k4
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k3}
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vptestmd %zmm5, %zmm5, %k0 {%k4}
; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0 {%k4}
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: shll $16, %ecx
; KNL-NEXT: orl %eax, %ecx
Expand Down Expand Up @@ -233,13 +233,13 @@ define i16 @kshiftl_v16i1_15(<16 x i32> %x, <16 x i32> %y) {
define i32 @kshiftl_v32i1_31(<32 x i16> %x, <32 x i16> %y) {
; KNL-LABEL: kshiftl_v32i1_31:
; KNL: # %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm0
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm0
; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; KNL-NEXT: kmovw %k0, %eax
Expand All @@ -266,13 +266,13 @@ define i32 @kshiftl_v32i1_31(<32 x i16> %x, <32 x i16> %y) {
define i64 @kshiftl_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
; KNL-LABEL: kshiftl_v64i1_63:
; KNL: # %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; KNL-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: vextracti128 $1, %ymm1, %xmm0
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm0
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
Expand Down Expand Up @@ -358,23 +358,23 @@ define i16 @kshiftr_v16i1_1(<16 x i32> %x, <16 x i32> %y) {
define i32 @kshiftr_v32i1_1(<32 x i16> %x, <32 x i16> %y) {
; KNL-LABEL: kshiftr_v32i1_1:
; KNL: # %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; KNL-NEXT: vpxor %xmm4, %xmm4, %xmm4
; KNL-NEXT: vpcmpeqw %ymm4, %ymm3, %ymm3
; KNL-NEXT: vpmovsxwd %ymm3, %zmm3
; KNL-NEXT: vptestmd %zmm3, %zmm3, %k1
; KNL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2
; KNL-NEXT: vpmovsxwd %ymm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1
; KNL-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k2
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; KNL-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
; KNL-NEXT: valignd {{.*#+}} zmm0 = zmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm3[0]
; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; KNL-NEXT: valignd {{.*#+}} zmm0 = zmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm2[0]
; KNL-NEXT: kshiftrw $1, %k1, %k1
; KNL-NEXT: vpcmpeqw %ymm4, %ymm1, %ymm1
; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k2
; KNL-NEXT: vpcmpeqw %ymm4, %ymm2, %ymm1
; KNL-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm2
; KNL-NEXT: vpmovsxwd %ymm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k2
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; KNL-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1
; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k1}
; KNL-NEXT: kmovw %k0, %ecx
Expand Down Expand Up @@ -404,36 +404,36 @@ define i32 @kshiftr_v32i1_1(<32 x i16> %x, <32 x i16> %y) {
define i64 @kshiftr_v64i1_1(<64 x i8> %x, <64 x i8> %y) {
; KNL-LABEL: kshiftr_v64i1_1:
; KNL: # %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; KNL-NEXT: vpxor %xmm4, %xmm4, %xmm4
; KNL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm3
; KNL-NEXT: vextracti128 $1, %ymm3, %xmm5
; KNL-NEXT: vpmovsxbd %xmm5, %zmm5
; KNL-NEXT: vptestmd %zmm5, %zmm5, %k1
; KNL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm5
; KNL-NEXT: vptestmd %zmm5, %zmm5, %k2
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; KNL-NEXT: vpcmpeqb %ymm2, %ymm3, %ymm3
; KNL-NEXT: vextracti128 $1, %ymm3, %xmm4
; KNL-NEXT: vpmovsxbd %xmm4, %zmm4
; KNL-NEXT: vptestmd %zmm4, %zmm4, %k1
; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm4
; KNL-NEXT: vptestmd %zmm4, %zmm4, %k2
; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
; KNL-NEXT: vptestmd %zmm3, %zmm3, %k3
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k4
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z}
; KNL-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k3} {z}
; KNL-NEXT: valignd {{.*#+}} zmm5 = zmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm3[0]
; KNL-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z}
; KNL-NEXT: valignd {{.*#+}} zmm0 = zmm6[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0]
; KNL-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k1} {z}
; KNL-NEXT: valignd {{.*#+}} zmm3 = zmm3[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm6[0]
; KNL-NEXT: valignd {{.*#+}} zmm4 = zmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm3[0]
; KNL-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z}
; KNL-NEXT: valignd {{.*#+}} zmm0 = zmm5[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0]
; KNL-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z}
; KNL-NEXT: valignd {{.*#+}} zmm3 = zmm3[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm5[0]
; KNL-NEXT: kshiftrw $1, %k1, %k3
; KNL-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm1
; KNL-NEXT: vextracti128 $1, %ymm1, %xmm6
; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm5
; KNL-NEXT: vextracti128 $1, %ymm5, %xmm6
; KNL-NEXT: vpmovsxbd %xmm6, %zmm6
; KNL-NEXT: vptestmd %zmm6, %zmm6, %k1
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k2
; KNL-NEXT: vpcmpeqb %ymm4, %ymm2, %ymm1
; KNL-NEXT: vpmovsxbd %xmm5, %zmm5
; KNL-NEXT: vptestmd %zmm5, %zmm5, %k2
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
; KNL-NEXT: vpmovsxbd %xmm1, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k4
; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1
Expand All @@ -447,7 +447,7 @@ define i64 @kshiftr_v64i1_1(<64 x i8> %x, <64 x i8> %y) {
; KNL-NEXT: shlq $32, %rcx
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k2}
; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: vptestmd %zmm5, %zmm5, %k0 {%k1}
; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0 {%k1}
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: shll $16, %eax
; KNL-NEXT: orl %edx, %eax
Expand Down
16 changes: 10 additions & 6 deletions llvm/test/CodeGen/X86/madd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2253,12 +2253,16 @@ define <32 x i32> @jumbled_indices32(<64 x i16> %A, <64 x i16> %B) {
;
; AVX512F-LABEL: jumbled_indices32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpmaddwd %ymm5, %ymm1, %ymm1
; AVX512F-NEXT: vpmaddwd %ymm4, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: vpmaddwd %ymm7, %ymm3, %ymm1
; AVX512F-NEXT: vpmaddwd %ymm6, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm4
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5
; AVX512F-NEXT: vpmaddwd %ymm4, %ymm5, %ymm4
; AVX512F-NEXT: vpmaddwd %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0
; AVX512F-NEXT: vextracti64x4 $1, %zmm3, %ymm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm4
; AVX512F-NEXT: vpmaddwd %ymm2, %ymm4, %ymm2
; AVX512F-NEXT: vpmaddwd %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: jumbled_indices32:
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/masked_store_trunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5373,14 +5373,14 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
;
; AVX512F-LABEL: truncstore_v32i16_v32i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0
; AVX512F-NEXT: vpmovmskb %ymm1, %eax
; AVX512F-NEXT: notl %eax
; AVX512F-NEXT: testb $1, %al
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6140,9 +6140,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
;
; AVX512F-LABEL: truncstore_v32i16_v32i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpminuw %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpminuw %ymm3, %ymm0, %ymm0
Expand Down
60 changes: 16 additions & 44 deletions llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -460,22 +460,15 @@ define <16 x i32> @merge_16i32_i32_0uu3zzuuuuuzCuEF(i32* %ptr) nounwind uwtable
}

define <32 x i16> @merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz(i16* %ptr) nounwind uwtable noinline ssp {
; AVX512F-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX512F-NEXT: vmovaps %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX512BW-NEXT: retq
; ALL-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
; ALL: # %bb.0:
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; ALL-NEXT: retq
;
; X32-AVX512F-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
; X32-AVX512F: # %bb.0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X32-AVX512F-NEXT: vmovaps %ymm0, %ymm0
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds i16, i16* %ptr, i64 1
%ptr1 = getelementptr inbounds i16, i16* %ptr, i64 2
Expand Down Expand Up @@ -515,22 +508,15 @@ define <32 x i16> @merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu(i16* %ptr) n
}

define <32 x i16> @merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu(i16* %ptr) nounwind uwtable noinline ssp {
; AVX512F-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-NEXT: vmovaps %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512BW-NEXT: retq
; ALL-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
; ALL: # %bb.0:
; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; ALL-NEXT: retq
;
; X32-AVX512F-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
; X32-AVX512F: # %bb.0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-AVX512F-NEXT: vmovaps %ymm0, %ymm0
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds i16, i16* %ptr, i64 2
%ptr1 = getelementptr inbounds i16, i16* %ptr, i64 3
Expand All @@ -547,22 +533,15 @@ define <32 x i16> @merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu(i16* %ptr) n
}

define <64 x i8> @merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz(i8* %ptr) nounwind uwtable noinline ssp {
; AVX512F-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX512F-NEXT: vmovaps %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX512BW-NEXT: retq
; ALL-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
; ALL: # %bb.0:
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; ALL-NEXT: retq
;
; X32-AVX512F-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
; X32-AVX512F: # %bb.0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X32-AVX512F-NEXT: vmovaps %ymm0, %ymm0
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds i8, i8* %ptr, i64 1
%ptr1 = getelementptr inbounds i8, i8* %ptr, i64 2
Expand All @@ -585,22 +564,15 @@ define <64 x i8> @merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu
}

define <64 x i8> @merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz(i8* %ptr) nounwind uwtable noinline ssp {
; AVX512F-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-NEXT: vmovaps %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512BW-NEXT: retq
; ALL-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
; ALL: # %bb.0:
; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; ALL-NEXT: retq
;
; X32-AVX512F-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
; X32-AVX512F: # %bb.0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-AVX512F-NEXT: vmovaps %ymm0, %ymm0
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds i8, i8* %ptr, i64 1
%ptr1 = getelementptr inbounds i8, i8* %ptr, i64 2
Expand Down
524 changes: 262 additions & 262 deletions llvm/test/CodeGen/X86/midpoint-int-vec-512.ll

Large diffs are not rendered by default.

104 changes: 52 additions & 52 deletions llvm/test/CodeGen/X86/movmsk-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -543,13 +543,13 @@ define i1 @allones_v32i16_sign(<32 x i16> %arg) {
;
; KNL-LABEL: allones_v32i16_sign:
; KNL: # %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; KNL-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2
; KNL-NEXT: vpmovsxwd %ymm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm0
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %ecx
Expand Down Expand Up @@ -611,13 +611,13 @@ define i1 @allzeros_v32i16_sign(<32 x i16> %arg) {
;
; KNL-LABEL: allzeros_v32i16_sign:
; KNL: # %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; KNL-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2
; KNL-NEXT: vpmovsxwd %ymm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm0
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %ecx
Expand Down Expand Up @@ -1381,12 +1381,12 @@ define i1 @allones_v64i8_and1(<64 x i8> %arg) {
;
; KNL-LABEL: allones_v64i8_and1:
; KNL: # %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; KNL-NEXT: vpsllw $7, %ymm0, %ymm1
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
; KNL-NEXT: vpsllw $7, %ymm1, %ymm1
; KNL-NEXT: vpmovmskb %ymm1, %eax
; KNL-NEXT: vpmovmskb %ymm0, %eax
; KNL-NEXT: shlq $32, %rax
; KNL-NEXT: vpmovmskb %ymm0, %ecx
; KNL-NEXT: vpmovmskb %ymm1, %ecx
; KNL-NEXT: orq %rax, %rcx
; KNL-NEXT: cmpq $-1, %rcx
; KNL-NEXT: sete %al
Expand Down Expand Up @@ -1463,12 +1463,12 @@ define i1 @allzeros_v64i8_and1(<64 x i8> %arg) {
;
; KNL-LABEL: allzeros_v64i8_and1:
; KNL: # %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; KNL-NEXT: vpsllw $7, %ymm0, %ymm1
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
; KNL-NEXT: vpsllw $7, %ymm1, %ymm1
; KNL-NEXT: vpmovmskb %ymm1, %eax
; KNL-NEXT: vpmovmskb %ymm0, %eax
; KNL-NEXT: shlq $32, %rax
; KNL-NEXT: vpmovmskb %ymm0, %ecx
; KNL-NEXT: vpmovmskb %ymm1, %ecx
; KNL-NEXT: orq %rax, %rcx
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
Expand Down Expand Up @@ -1686,13 +1686,13 @@ define i1 @allones_v32i16_and1(<32 x i16> %arg) {
;
; KNL-LABEL: allones_v32i16_and1:
; KNL: # %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: vpsllw $15, %ymm0, %ymm1
; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpsllw $15, %ymm1, %ymm0
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
Expand Down Expand Up @@ -1766,13 +1766,13 @@ define i1 @allzeros_v32i16_and1(<32 x i16> %arg) {
;
; KNL-LABEL: allzeros_v32i16_and1:
; KNL: # %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: vpsllw $15, %ymm0, %ymm1
; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpsllw $15, %ymm1, %ymm0
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
Expand Down Expand Up @@ -2784,12 +2784,12 @@ define i1 @allones_v64i8_and4(<64 x i8> %arg) {
;
; KNL-LABEL: allones_v64i8_and4:
; KNL: # %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; KNL-NEXT: vpsllw $5, %ymm0, %ymm1
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
; KNL-NEXT: vpsllw $5, %ymm1, %ymm1
; KNL-NEXT: vpmovmskb %ymm1, %eax
; KNL-NEXT: vpmovmskb %ymm0, %eax
; KNL-NEXT: shlq $32, %rax
; KNL-NEXT: vpmovmskb %ymm0, %ecx
; KNL-NEXT: vpmovmskb %ymm1, %ecx
; KNL-NEXT: orq %rax, %rcx
; KNL-NEXT: cmpq $-1, %rcx
; KNL-NEXT: sete %al
Expand Down Expand Up @@ -2866,12 +2866,12 @@ define i1 @allzeros_v64i8_and4(<64 x i8> %arg) {
;
; KNL-LABEL: allzeros_v64i8_and4:
; KNL: # %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; KNL-NEXT: vpsllw $5, %ymm0, %ymm1
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
; KNL-NEXT: vpsllw $5, %ymm1, %ymm1
; KNL-NEXT: vpmovmskb %ymm1, %eax
; KNL-NEXT: vpmovmskb %ymm0, %eax
; KNL-NEXT: shlq $32, %rax
; KNL-NEXT: vpmovmskb %ymm0, %ecx
; KNL-NEXT: vpmovmskb %ymm1, %ecx
; KNL-NEXT: orq %rax, %rcx
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
Expand Down Expand Up @@ -3089,13 +3089,13 @@ define i1 @allones_v32i16_and4(<32 x i16> %arg) {
;
; KNL-LABEL: allones_v32i16_and4:
; KNL: # %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; KNL-NEXT: vpsllw $13, %ymm0, %ymm0
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: vpsllw $13, %ymm0, %ymm1
; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpsllw $13, %ymm1, %ymm0
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; KNL-NEXT: vpsllw $13, %ymm0, %ymm0
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
Expand Down Expand Up @@ -3169,13 +3169,13 @@ define i1 @allzeros_v32i16_and4(<32 x i16> %arg) {
;
; KNL-LABEL: allzeros_v32i16_and4:
; KNL: # %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; KNL-NEXT: vpsllw $13, %ymm0, %ymm0
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: vpsllw $13, %ymm0, %ymm1
; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpsllw $13, %ymm1, %ymm0
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; KNL-NEXT: vpsllw $13, %ymm0, %ymm0
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
Expand Down
182 changes: 64 additions & 118 deletions llvm/test/CodeGen/X86/nontemporal-loads-2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -921,44 +921,24 @@ define <32 x i16> @test_v32i16_align16(<32 x i16>* %src) nounwind {
; AVX2-NEXT: popq %rbp
; AVX2-NEXT: retq
;
; AVX512DQ-LABEL: test_v32i16_align16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: pushq %rbp
; AVX512DQ-NEXT: movq %rsp, %rbp
; AVX512DQ-NEXT: andq $-32, %rsp
; AVX512DQ-NEXT: subq $96, %rsp
; AVX512DQ-NEXT: vmovntdqa 16(%rdi), %xmm0
; AVX512DQ-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; AVX512DQ-NEXT: vmovntdqa (%rdi), %xmm0
; AVX512DQ-NEXT: vmovdqa %xmm0, (%rsp)
; AVX512DQ-NEXT: vmovntdqa 48(%rdi), %xmm0
; AVX512DQ-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; AVX512DQ-NEXT: vmovntdqa 32(%rdi), %xmm0
; AVX512DQ-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; AVX512DQ-NEXT: vmovaps (%rsp), %ymm0
; AVX512DQ-NEXT: vinsertf64x4 $1, {{[0-9]+}}(%rsp), %zmm0, %zmm0
; AVX512DQ-NEXT: movq %rbp, %rsp
; AVX512DQ-NEXT: popq %rbp
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: test_v32i16_align16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: pushq %rbp
; AVX512BW-NEXT: movq %rsp, %rbp
; AVX512BW-NEXT: andq $-64, %rsp
; AVX512BW-NEXT: subq $128, %rsp
; AVX512BW-NEXT: vmovntdqa 48(%rdi), %xmm0
; AVX512BW-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; AVX512BW-NEXT: vmovntdqa 32(%rdi), %xmm0
; AVX512BW-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; AVX512BW-NEXT: vmovntdqa 16(%rdi), %xmm0
; AVX512BW-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; AVX512BW-NEXT: vmovntdqa (%rdi), %xmm0
; AVX512BW-NEXT: vmovdqa %xmm0, (%rsp)
; AVX512BW-NEXT: vmovaps (%rsp), %zmm0
; AVX512BW-NEXT: movq %rbp, %rsp
; AVX512BW-NEXT: popq %rbp
; AVX512BW-NEXT: retq
; AVX512-LABEL: test_v32i16_align16:
; AVX512: # %bb.0:
; AVX512-NEXT: pushq %rbp
; AVX512-NEXT: movq %rsp, %rbp
; AVX512-NEXT: andq $-64, %rsp
; AVX512-NEXT: subq $128, %rsp
; AVX512-NEXT: vmovntdqa 48(%rdi), %xmm0
; AVX512-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; AVX512-NEXT: vmovntdqa 32(%rdi), %xmm0
; AVX512-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; AVX512-NEXT: vmovntdqa 16(%rdi), %xmm0
; AVX512-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; AVX512-NEXT: vmovntdqa (%rdi), %xmm0
; AVX512-NEXT: vmovdqa %xmm0, (%rsp)
; AVX512-NEXT: vmovaps (%rsp), %zmm0
; AVX512-NEXT: movq %rbp, %rsp
; AVX512-NEXT: popq %rbp
; AVX512-NEXT: retq
%1 = load <32 x i16>, <32 x i16>* %src, align 16, !nontemporal !1
ret <32 x i16> %1
}
Expand Down Expand Up @@ -1020,44 +1000,24 @@ define <64 x i8> @test_v64i8_align16(<64 x i8>* %src) nounwind {
; AVX2-NEXT: popq %rbp
; AVX2-NEXT: retq
;
; AVX512DQ-LABEL: test_v64i8_align16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: pushq %rbp
; AVX512DQ-NEXT: movq %rsp, %rbp
; AVX512DQ-NEXT: andq $-32, %rsp
; AVX512DQ-NEXT: subq $96, %rsp
; AVX512DQ-NEXT: vmovntdqa 16(%rdi), %xmm0
; AVX512DQ-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; AVX512DQ-NEXT: vmovntdqa (%rdi), %xmm0
; AVX512DQ-NEXT: vmovdqa %xmm0, (%rsp)
; AVX512DQ-NEXT: vmovntdqa 48(%rdi), %xmm0
; AVX512DQ-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; AVX512DQ-NEXT: vmovntdqa 32(%rdi), %xmm0
; AVX512DQ-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; AVX512DQ-NEXT: vmovaps (%rsp), %ymm0
; AVX512DQ-NEXT: vinsertf64x4 $1, {{[0-9]+}}(%rsp), %zmm0, %zmm0
; AVX512DQ-NEXT: movq %rbp, %rsp
; AVX512DQ-NEXT: popq %rbp
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: test_v64i8_align16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: pushq %rbp
; AVX512BW-NEXT: movq %rsp, %rbp
; AVX512BW-NEXT: andq $-64, %rsp
; AVX512BW-NEXT: subq $128, %rsp
; AVX512BW-NEXT: vmovntdqa 48(%rdi), %xmm0
; AVX512BW-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; AVX512BW-NEXT: vmovntdqa 32(%rdi), %xmm0
; AVX512BW-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; AVX512BW-NEXT: vmovntdqa 16(%rdi), %xmm0
; AVX512BW-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; AVX512BW-NEXT: vmovntdqa (%rdi), %xmm0
; AVX512BW-NEXT: vmovdqa %xmm0, (%rsp)
; AVX512BW-NEXT: vmovaps (%rsp), %zmm0
; AVX512BW-NEXT: movq %rbp, %rsp
; AVX512BW-NEXT: popq %rbp
; AVX512BW-NEXT: retq
; AVX512-LABEL: test_v64i8_align16:
; AVX512: # %bb.0:
; AVX512-NEXT: pushq %rbp
; AVX512-NEXT: movq %rsp, %rbp
; AVX512-NEXT: andq $-64, %rsp
; AVX512-NEXT: subq $128, %rsp
; AVX512-NEXT: vmovntdqa 48(%rdi), %xmm0
; AVX512-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; AVX512-NEXT: vmovntdqa 32(%rdi), %xmm0
; AVX512-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; AVX512-NEXT: vmovntdqa 16(%rdi), %xmm0
; AVX512-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; AVX512-NEXT: vmovntdqa (%rdi), %xmm0
; AVX512-NEXT: vmovdqa %xmm0, (%rsp)
; AVX512-NEXT: vmovaps (%rsp), %zmm0
; AVX512-NEXT: movq %rbp, %rsp
; AVX512-NEXT: popq %rbp
; AVX512-NEXT: retq
%1 = load <64 x i8>, <64 x i8>* %src, align 16, !nontemporal !1
ret <64 x i8> %1
}
Expand Down Expand Up @@ -1299,27 +1259,20 @@ define <32 x i16> @test_v32i16_align32(<32 x i16>* %src) nounwind {
; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm1
; AVX2-NEXT: retq
;
; AVX512DQ-LABEL: test_v32i16_align32:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovntdqa (%rdi), %ymm0
; AVX512DQ-NEXT: vmovntdqa 32(%rdi), %ymm1
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: test_v32i16_align32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: pushq %rbp
; AVX512BW-NEXT: movq %rsp, %rbp
; AVX512BW-NEXT: andq $-64, %rsp
; AVX512BW-NEXT: subq $128, %rsp
; AVX512BW-NEXT: vmovntdqa 32(%rdi), %ymm0
; AVX512BW-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
; AVX512BW-NEXT: vmovntdqa (%rdi), %ymm0
; AVX512BW-NEXT: vmovdqa %ymm0, (%rsp)
; AVX512BW-NEXT: vmovaps (%rsp), %zmm0
; AVX512BW-NEXT: movq %rbp, %rsp
; AVX512BW-NEXT: popq %rbp
; AVX512BW-NEXT: retq
; AVX512-LABEL: test_v32i16_align32:
; AVX512: # %bb.0:
; AVX512-NEXT: pushq %rbp
; AVX512-NEXT: movq %rsp, %rbp
; AVX512-NEXT: andq $-64, %rsp
; AVX512-NEXT: subq $128, %rsp
; AVX512-NEXT: vmovntdqa 32(%rdi), %ymm0
; AVX512-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
; AVX512-NEXT: vmovntdqa (%rdi), %ymm0
; AVX512-NEXT: vmovdqa %ymm0, (%rsp)
; AVX512-NEXT: vmovaps (%rsp), %zmm0
; AVX512-NEXT: movq %rbp, %rsp
; AVX512-NEXT: popq %rbp
; AVX512-NEXT: retq
%1 = load <32 x i16>, <32 x i16>* %src, align 32, !nontemporal !1
ret <32 x i16> %1
}
Expand Down Expand Up @@ -1357,27 +1310,20 @@ define <64 x i8> @test_v64i8_align32(<64 x i8>* %src) nounwind {
; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm1
; AVX2-NEXT: retq
;
; AVX512DQ-LABEL: test_v64i8_align32:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovntdqa (%rdi), %ymm0
; AVX512DQ-NEXT: vmovntdqa 32(%rdi), %ymm1
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: test_v64i8_align32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: pushq %rbp
; AVX512BW-NEXT: movq %rsp, %rbp
; AVX512BW-NEXT: andq $-64, %rsp
; AVX512BW-NEXT: subq $128, %rsp
; AVX512BW-NEXT: vmovntdqa 32(%rdi), %ymm0
; AVX512BW-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
; AVX512BW-NEXT: vmovntdqa (%rdi), %ymm0
; AVX512BW-NEXT: vmovdqa %ymm0, (%rsp)
; AVX512BW-NEXT: vmovaps (%rsp), %zmm0
; AVX512BW-NEXT: movq %rbp, %rsp
; AVX512BW-NEXT: popq %rbp
; AVX512BW-NEXT: retq
; AVX512-LABEL: test_v64i8_align32:
; AVX512: # %bb.0:
; AVX512-NEXT: pushq %rbp
; AVX512-NEXT: movq %rsp, %rbp
; AVX512-NEXT: andq $-64, %rsp
; AVX512-NEXT: subq $128, %rsp
; AVX512-NEXT: vmovntdqa 32(%rdi), %ymm0
; AVX512-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
; AVX512-NEXT: vmovntdqa (%rdi), %ymm0
; AVX512-NEXT: vmovdqa %ymm0, (%rsp)
; AVX512-NEXT: vmovaps (%rsp), %zmm0
; AVX512-NEXT: movq %rbp, %rsp
; AVX512-NEXT: popq %rbp
; AVX512-NEXT: retq
%1 = load <64 x i8>, <64 x i8>* %src, align 32, !nontemporal !1
ret <64 x i8> %1
}
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/X86/nontemporal-loads.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1275,10 +1275,10 @@ define <32 x i16> @test_arg_v32i16(<32 x i16> %arg, <32 x i16>* %src) {
;
; AVX512F-LABEL: test_arg_v32i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vmovntdqa 32(%rdi), %ymm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512F-NEXT: vpaddw %ymm1, %ymm2, %ymm1
; AVX512F-NEXT: vmovntdqa (%rdi), %ymm2
; AVX512F-NEXT: vmovntdqa 32(%rdi), %ymm3
; AVX512F-NEXT: vpaddw %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpaddw %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
Expand All @@ -1291,10 +1291,10 @@ define <32 x i16> @test_arg_v32i16(<32 x i16> %arg, <32 x i16>* %src) {
;
; AVX512VL-LABEL: test_arg_v32i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512VL-NEXT: vmovntdqa 32(%rdi), %ymm1
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512VL-NEXT: vpaddw %ymm1, %ymm2, %ymm1
; AVX512VL-NEXT: vmovntdqa (%rdi), %ymm2
; AVX512VL-NEXT: vmovntdqa 32(%rdi), %ymm3
; AVX512VL-NEXT: vpaddw %ymm3, %ymm1, %ymm1
; AVX512VL-NEXT: vpaddw %ymm2, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512VL-NEXT: retq
Expand Down Expand Up @@ -1350,10 +1350,10 @@ define <64 x i8> @test_arg_v64i8(<64 x i8> %arg, <64 x i8>* %src) {
;
; AVX512F-LABEL: test_arg_v64i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vmovntdqa 32(%rdi), %ymm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512F-NEXT: vpaddb %ymm1, %ymm2, %ymm1
; AVX512F-NEXT: vmovntdqa (%rdi), %ymm2
; AVX512F-NEXT: vmovntdqa 32(%rdi), %ymm3
; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
Expand All @@ -1366,10 +1366,10 @@ define <64 x i8> @test_arg_v64i8(<64 x i8> %arg, <64 x i8>* %src) {
;
; AVX512VL-LABEL: test_arg_v64i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512VL-NEXT: vmovntdqa 32(%rdi), %ymm1
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512VL-NEXT: vpaddb %ymm1, %ymm2, %ymm1
; AVX512VL-NEXT: vmovntdqa (%rdi), %ymm2
; AVX512VL-NEXT: vmovntdqa 32(%rdi), %ymm3
; AVX512VL-NEXT: vpaddb %ymm3, %ymm1, %ymm1
; AVX512VL-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512VL-NEXT: retq
Expand Down
Loading