@@ -385,3 +385,69 @@ define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x
385385 %6 = sitofp <4 x i64 > %5 to <4 x float >
386386 ret <4 x float > %6
387387}
388+
389+ ; Make sure we can preserve sign bit information into the second basic block
390+ ; so we can avoid having to shift bit 0 into bit 7 for each element due to
391+ ; v32i1->v32i8 promotion and the splitting of v32i8 into 2xv16i8. This requires
392+ ; ComputeNumSignBits handling for insert_subvector.
393+ define void @cross_bb_signbits_insert_subvec (<32 x i8 >* %ptr , <32 x i8 > %x , <32 x i8 > %z ) {
394+ ; X32-LABEL: cross_bb_signbits_insert_subvec:
395+ ; X32: # %bb.0:
396+ ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
397+ ; X32-NEXT: vextractf128 $1, %ymm0, %xmm3
398+ ; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2
399+ ; X32-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
400+ ; X32-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
401+ ; X32-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
402+ ; X32-NEXT: vextractf128 $1, %ymm0, %xmm3
403+ ; X32-NEXT: vpsllw $7, %xmm3, %xmm3
404+ ; X32-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
405+ ; X32-NEXT: vpand %xmm4, %xmm3, %xmm3
406+ ; X32-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm3
407+ ; X32-NEXT: vpsllw $7, %xmm0, %xmm0
408+ ; X32-NEXT: vpand %xmm4, %xmm0, %xmm0
409+ ; X32-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
410+ ; X32-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
411+ ; X32-NEXT: vandnps %ymm1, %ymm0, %ymm1
412+ ; X32-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
413+ ; X32-NEXT: vorps %ymm1, %ymm0, %ymm0
414+ ; X32-NEXT: vmovaps %ymm0, (%eax)
415+ ; X32-NEXT: vzeroupper
416+ ; X32-NEXT: retl
417+ ;
418+ ; X64-LABEL: cross_bb_signbits_insert_subvec:
419+ ; X64: # %bb.0:
420+ ; X64-NEXT: vextractf128 $1, %ymm0, %xmm3
421+ ; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2
422+ ; X64-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
423+ ; X64-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
424+ ; X64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
425+ ; X64-NEXT: vextractf128 $1, %ymm0, %xmm3
426+ ; X64-NEXT: vpsllw $7, %xmm3, %xmm3
427+ ; X64-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
428+ ; X64-NEXT: vpand %xmm4, %xmm3, %xmm3
429+ ; X64-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm3
430+ ; X64-NEXT: vpsllw $7, %xmm0, %xmm0
431+ ; X64-NEXT: vpand %xmm4, %xmm0, %xmm0
432+ ; X64-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
433+ ; X64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
434+ ; X64-NEXT: vandnps %ymm1, %ymm0, %ymm1
435+ ; X64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
436+ ; X64-NEXT: vorps %ymm1, %ymm0, %ymm0
437+ ; X64-NEXT: vmovaps %ymm0, (%rdi)
438+ ; X64-NEXT: vzeroupper
439+ ; X64-NEXT: retq
440+ %a = icmp eq <32 x i8 > %x , zeroinitializer
441+ %b = icmp eq <32 x i8 > %x , zeroinitializer
442+ %c = and <32 x i1 > %a , %b
443+ br label %block
444+
445+ block:
446+ %d = select <32 x i1 > %c , <32 x i8 > <i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 >, <32 x i8 > %z
447+ store <32 x i8 > %d , <32 x i8 >* %ptr , align 32
448+ br label %exit
449+
450+ exit:
451+ ret void
452+ }
453+
0 commit comments