@@ -83,12 +83,12 @@ define i8 @concat_icmp_v8i32_v4i32(<4 x i32> %a0, <4 x i32> %a1) {
8383;
8484; AVX512-LABEL: concat_icmp_v8i32_v4i32:
8585; AVX512: # %bb.0:
86- ; AVX512-NEXT: vptestnmd %xmm0, %xmm0, %k0
87- ; AVX512-NEXT: vptestnmd %xmm1, %xmm1, %k1
88- ; AVX512-NEXT: kshiftlb $4, %k1, %k1
89- ; AVX512-NEXT: korb %k1, %k0, %k0
86+ ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
87+ ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
88+ ; AVX512-NEXT: vptestnmd %ymm0, %ymm0, %k0
9089; AVX512-NEXT: kmovd %k0, %eax
9190; AVX512-NEXT: # kill: def $al killed $al killed $eax
91+ ; AVX512-NEXT: vzeroupper
9292; AVX512-NEXT: retq
9393 %v0 = icmp eq <4 x i32 > %a0 , zeroinitializer
9494 %v1 = icmp eq <4 x i32 > %a1 , zeroinitializer
@@ -151,12 +151,12 @@ define i16 @concat_icmp_v16i16_v8i16(<8 x i16> %a0, <8 x i16> %a1) {
151151;
152152; AVX512-LABEL: concat_icmp_v16i16_v8i16:
153153; AVX512: # %bb.0:
154- ; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
155- ; AVX512-NEXT: vpcmpnleuw %xmm2, %xmm0, %k0
156- ; AVX512-NEXT: vpcmpnleuw %xmm2, %xmm1, %k1
157- ; AVX512-NEXT: kunpckbw %k0, %k1, %k0
154+ ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
155+ ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
156+ ; AVX512-NEXT: vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
158157; AVX512-NEXT: kmovd %k0, %eax
159158; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
159+ ; AVX512-NEXT: vzeroupper
160160; AVX512-NEXT: retq
161161 %v0 = icmp ugt <8 x i16 > %a0 , splat (i16 1 )
162162 %v1 = icmp ugt <8 x i16 > %a1 , splat (i16 1 )
@@ -199,11 +199,11 @@ define i32 @concat_icmp_v32i8_v16i8(<16 x i8> %a0, <16 x i8> %a1) {
199199;
200200; AVX512-LABEL: concat_icmp_v32i8_v16i8:
201201; AVX512: # %bb.0:
202- ; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
203- ; AVX512-NEXT: vpcmpgtb %xmm2, %xmm0, %k0
204- ; AVX512-NEXT: vpcmpgtb %xmm2, %xmm1, %k1
205- ; AVX512-NEXT: kunpckwd %k0, %k1, %k0
202+ ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
203+ ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
204+ ; AVX512-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
206205; AVX512-NEXT: kmovd %k0, %eax
206+ ; AVX512-NEXT: vzeroupper
207207; AVX512-NEXT: retq
208208 %v0 = icmp sgt <16 x i8 > %a0 , splat (i8 5 )
209209 %v1 = icmp sgt <16 x i8 > %a1 , splat (i8 5 )
@@ -329,21 +329,15 @@ define i8 @concat_icmp_v8i64_v2i64(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2,
329329;
330330; AVX512-LABEL: concat_icmp_v8i64_v2i64:
331331; AVX512: # %bb.0:
332- ; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm4 = [128,128]
333- ; AVX512-NEXT: vpcmpltuq %xmm4, %xmm0, %k0
334- ; AVX512-NEXT: vpcmpltuq %xmm4, %xmm1, %k1
335- ; AVX512-NEXT: vpcmpltuq %xmm4, %xmm2, %k2
336- ; AVX512-NEXT: vpcmpltuq %xmm4, %xmm3, %k3
337- ; AVX512-NEXT: kshiftlb $2, %k3, %k3
338- ; AVX512-NEXT: korb %k3, %k2, %k2
339- ; AVX512-NEXT: kshiftlb $4, %k2, %k2
340- ; AVX512-NEXT: kshiftlb $2, %k1, %k1
341- ; AVX512-NEXT: korw %k1, %k0, %k0
342- ; AVX512-NEXT: kshiftlb $4, %k0, %k0
343- ; AVX512-NEXT: kshiftrb $4, %k0, %k0
344- ; AVX512-NEXT: korb %k2, %k0, %k0
332+ ; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
333+ ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
334+ ; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
335+ ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
336+ ; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
337+ ; AVX512-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
345338; AVX512-NEXT: kmovd %k0, %eax
346339; AVX512-NEXT: # kill: def $al killed $al killed $eax
340+ ; AVX512-NEXT: vzeroupper
347341; AVX512-NEXT: retq
348342 %v0 = icmp ult <2 x i64 > %a0 , splat (i64 128 )
349343 %v1 = icmp ult <2 x i64 > %a1 , splat (i64 128 )
@@ -387,18 +381,16 @@ define i16 @concat_icmp_v16i32_v4i32(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2
387381;
388382; AVX512-LABEL: concat_icmp_v16i32_v4i32:
389383; AVX512: # %bb.0:
390- ; AVX512-NEXT: vpxor %xmm4, %xmm4, %xmm4
391- ; AVX512-NEXT: vpcmpgtd %xmm4, %xmm0, %k0
392- ; AVX512-NEXT: vpcmpgtd %xmm4, %xmm1, %k1
393- ; AVX512-NEXT: vpcmpgtd %xmm4, %xmm2, %k2
394- ; AVX512-NEXT: vpcmpgtd %xmm4, %xmm3, %k3
395- ; AVX512-NEXT: kshiftlb $4, %k1, %k1
396- ; AVX512-NEXT: korb %k1, %k0, %k0
397- ; AVX512-NEXT: kshiftlb $4, %k3, %k1
398- ; AVX512-NEXT: korb %k1, %k2, %k1
399- ; AVX512-NEXT: kunpckbw %k0, %k1, %k0
384+ ; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
385+ ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
386+ ; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
387+ ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
388+ ; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
389+ ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
390+ ; AVX512-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
400391; AVX512-NEXT: kmovd %k0, %eax
401392; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
393+ ; AVX512-NEXT: vzeroupper
402394; AVX512-NEXT: retq
403395 %v0 = icmp sgt <4 x i32 > %a0 , zeroinitializer
404396 %v1 = icmp sgt <4 x i32 > %a1 , zeroinitializer
@@ -468,14 +460,14 @@ define i32 @concat_icmp_v32i16_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2
468460;
469461; AVX512-LABEL: concat_icmp_v32i16_v8i16:
470462; AVX512: # %bb.0:
471- ; AVX512-NEXT: vptestmw %xmm0, %xmm0, %k0
472- ; AVX512-NEXT: vptestmw %xmm1, %xmm1, %k1
473- ; AVX512-NEXT: vptestmw %xmm2, %xmm2, %k2
474- ; AVX512-NEXT: vptestmw %xmm3, %xmm3, %k3
475- ; AVX512-NEXT: kunpckbw %k0, %k1, %k0
476- ; AVX512-NEXT: kunpckbw %k2, %k3, %k1
477- ; AVX512-NEXT: kunpckwd %k0, %k1, %k0
463+ ; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
464+ ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
465+ ; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
466+ ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
467+ ; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
468+ ; AVX512-NEXT: vptestmw %zmm0, %zmm0, %k0
478469; AVX512-NEXT: kmovd %k0, %eax
470+ ; AVX512-NEXT: vzeroupper
479471; AVX512-NEXT: retq
480472 %v0 = icmp ne <8 x i16 > %a0 , zeroinitializer
481473 %v1 = icmp ne <8 x i16 > %a1 , zeroinitializer
@@ -560,15 +552,14 @@ define i64 @concat_icmp_v64i8_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2,
560552;
561553; AVX512-LABEL: concat_icmp_v64i8_v16i8:
562554; AVX512: # %bb.0:
563- ; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
564- ; AVX512-NEXT: vpcmpnleub %xmm4, %xmm0, %k0
565- ; AVX512-NEXT: vpcmpnleub %xmm4, %xmm1, %k1
566- ; AVX512-NEXT: vpcmpnleub %xmm4, %xmm2, %k2
567- ; AVX512-NEXT: vpcmpnleub %xmm4, %xmm3, %k3
568- ; AVX512-NEXT: kunpckwd %k0, %k1, %k0
569- ; AVX512-NEXT: kunpckwd %k2, %k3, %k1
570- ; AVX512-NEXT: kunpckdq %k0, %k1, %k0
555+ ; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
556+ ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
557+ ; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
558+ ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
559+ ; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
560+ ; AVX512-NEXT: vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
571561; AVX512-NEXT: kmovq %k0, %rax
562+ ; AVX512-NEXT: vzeroupper
572563; AVX512-NEXT: retq
573564 %v0 = icmp ugt <16 x i8 > %a0 , splat (i8 15 )
574565 %v1 = icmp ugt <16 x i8 > %a1 , splat (i8 15 )
@@ -672,10 +663,9 @@ define i8 @concat_icmp_v8i64_v4i64(<4 x i64> %a0, <4 x i64> %a1) {
672663;
673664; AVX512-LABEL: concat_icmp_v8i64_v4i64:
674665; AVX512: # %bb.0:
675- ; AVX512-NEXT: vptestnmq %ymm0, %ymm0, %k0
676- ; AVX512-NEXT: vptestnmq %ymm1, %ymm1, %k1
677- ; AVX512-NEXT: kshiftlb $4, %k1, %k1
678- ; AVX512-NEXT: korb %k1, %k0, %k0
666+ ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
667+ ; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
668+ ; AVX512-NEXT: vptestnmq %zmm0, %zmm0, %k0
679669; AVX512-NEXT: kmovd %k0, %eax
680670; AVX512-NEXT: # kill: def $al killed $al killed $eax
681671; AVX512-NEXT: vzeroupper
@@ -768,10 +758,9 @@ define i16 @concat_icmp_v16i32_v8i32(<8 x i32> %a0, <8 x i32> %a1) {
768758;
769759; AVX512-LABEL: concat_icmp_v16i32_v8i32:
770760; AVX512: # %bb.0:
771- ; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
772- ; AVX512-NEXT: vpcmpnleud %ymm2, %ymm0, %k0
773- ; AVX512-NEXT: vpcmpnleud %ymm2, %ymm1, %k1
774- ; AVX512-NEXT: kunpckbw %k0, %k1, %k0
761+ ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
762+ ; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
763+ ; AVX512-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
775764; AVX512-NEXT: kmovd %k0, %eax
776765; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
777766; AVX512-NEXT: vzeroupper
@@ -830,10 +819,9 @@ define i32 @concat_icmp_v32i16_v16i16(<16 x i16> %a0, <16 x i16> %a1) {
830819;
831820; AVX512-LABEL: concat_icmp_v32i16_v16i16:
832821; AVX512: # %bb.0:
833- ; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
834- ; AVX512-NEXT: vpcmpgtw %ymm2, %ymm0, %k0
835- ; AVX512-NEXT: vpcmpgtw %ymm2, %ymm1, %k1
836- ; AVX512-NEXT: kunpckwd %k0, %k1, %k0
822+ ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
823+ ; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
824+ ; AVX512-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
837825; AVX512-NEXT: kmovd %k0, %eax
838826; AVX512-NEXT: vzeroupper
839827; AVX512-NEXT: retq
@@ -903,10 +891,9 @@ define i64 @concat_icmp_v64i8_v32i8(<32 x i8> %a0, <32 x i8> %a1) {
903891;
904892; AVX512-LABEL: concat_icmp_v64i8_v32i8:
905893; AVX512: # %bb.0:
906- ; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
907- ; AVX512-NEXT: vpcmpgtb %ymm0, %ymm2, %k0
908- ; AVX512-NEXT: vpcmpgtb %ymm1, %ymm2, %k1
909- ; AVX512-NEXT: kunpckdq %k0, %k1, %k0
894+ ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
895+ ; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
896+ ; AVX512-NEXT: vpcmpltb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
910897; AVX512-NEXT: kmovq %k0, %rax
911898; AVX512-NEXT: vzeroupper
912899; AVX512-NEXT: retq
0 commit comments