Skip to content

Commit 58c61dc

Browse files
committed
[X86] Add test case for D56283.
This tests a case where we need to be able to compute sign bits for two insert_subvectors that is a liveout of a basic block. The result is then used as a boolean vector in another basic block. llvm-svn: 350359
1 parent 6b8a9db commit 58c61dc

File tree

1 file changed

+66
-0
lines changed

1 file changed

+66
-0
lines changed

llvm/test/CodeGen/X86/known-signbits-vector.ll

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,3 +385,69 @@ define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x
385385
%6 = sitofp <4 x i64> %5 to <4 x float>
386386
ret <4 x float> %6
387387
}
388+
389+
; Make sure we can preserve sign bit information into the second basic block
390+
; so we can avoid having to shift bit 0 into bit 7 for each element due to
391+
; v32i1->v32i8 promotion and the splitting of v32i8 into 2xv16i8. This requires
392+
; ComputeNumSignBits handling for insert_subvector.
393+
define void @cross_bb_signbits_insert_subvec(<32 x i8>* %ptr, <32 x i8> %x, <32 x i8> %z) {
394+
; X32-LABEL: cross_bb_signbits_insert_subvec:
395+
; X32: # %bb.0:
396+
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
397+
; X32-NEXT: vextractf128 $1, %ymm0, %xmm3
398+
; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2
399+
; X32-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
400+
; X32-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
401+
; X32-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
402+
; X32-NEXT: vextractf128 $1, %ymm0, %xmm3
403+
; X32-NEXT: vpsllw $7, %xmm3, %xmm3
404+
; X32-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
405+
; X32-NEXT: vpand %xmm4, %xmm3, %xmm3
406+
; X32-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm3
407+
; X32-NEXT: vpsllw $7, %xmm0, %xmm0
408+
; X32-NEXT: vpand %xmm4, %xmm0, %xmm0
409+
; X32-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
410+
; X32-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
411+
; X32-NEXT: vandnps %ymm1, %ymm0, %ymm1
412+
; X32-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
413+
; X32-NEXT: vorps %ymm1, %ymm0, %ymm0
414+
; X32-NEXT: vmovaps %ymm0, (%eax)
415+
; X32-NEXT: vzeroupper
416+
; X32-NEXT: retl
417+
;
418+
; X64-LABEL: cross_bb_signbits_insert_subvec:
419+
; X64: # %bb.0:
420+
; X64-NEXT: vextractf128 $1, %ymm0, %xmm3
421+
; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2
422+
; X64-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
423+
; X64-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
424+
; X64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
425+
; X64-NEXT: vextractf128 $1, %ymm0, %xmm3
426+
; X64-NEXT: vpsllw $7, %xmm3, %xmm3
427+
; X64-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
428+
; X64-NEXT: vpand %xmm4, %xmm3, %xmm3
429+
; X64-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm3
430+
; X64-NEXT: vpsllw $7, %xmm0, %xmm0
431+
; X64-NEXT: vpand %xmm4, %xmm0, %xmm0
432+
; X64-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
433+
; X64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
434+
; X64-NEXT: vandnps %ymm1, %ymm0, %ymm1
435+
; X64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
436+
; X64-NEXT: vorps %ymm1, %ymm0, %ymm0
437+
; X64-NEXT: vmovaps %ymm0, (%rdi)
438+
; X64-NEXT: vzeroupper
439+
; X64-NEXT: retq
440+
%a = icmp eq <32 x i8> %x, zeroinitializer
441+
%b = icmp eq <32 x i8> %x, zeroinitializer
442+
%c = and <32 x i1> %a, %b
443+
br label %block
444+
445+
block:
446+
%d = select <32 x i1> %c, <32 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <32 x i8> %z
447+
store <32 x i8> %d, <32 x i8>* %ptr, align 32
448+
br label %exit
449+
450+
exit:
451+
ret void
452+
}
453+

0 commit comments

Comments
 (0)