Skip to content

Commit

Permalink
[X86][SSE] Improve SINT_TO_FP of boolean vector results (signum)
Browse files Browse the repository at this point in the history
This patch helps avoids poor legalization of boolean vector results (e.g. 8f32 -> 8i1 -> 8i16) that feed into SINT_TO_FP by inserting an early SIGN_EXTEND and so help improve the truncation logic.

This is not necessary for AVX512 targets where boolean vectors are legal - AVX512 manages to lower ( sint_to_fp vXi1 ) into some form of ( select mask, 1.0f , 0.0f ) in most cases.

Fix for PR13248

Differential Revision: https://reviews.llvm.org/D26583

llvm-svn: 286979
  • Loading branch information
RKSimon committed Nov 15, 2016
1 parent bb238bb commit ceffb43
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 109 deletions.
5 changes: 4 additions & 1 deletion llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -31752,9 +31752,12 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
EVT InVT = Op0.getValueType();
EVT InSVT = InVT.getScalarType();

// SINT_TO_FP(vXi1) -> SINT_TO_FP(SEXT(vXi1 to vXi32))
// SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32))
// SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32))
if (InVT.isVector() && (InSVT == MVT::i8 || InSVT == MVT::i16)) {
if (InVT.isVector() &&
(InSVT == MVT::i8 || InSVT == MVT::i16 ||
(InSVT == MVT::i1 && !DAG.getTargetLoweringInfo().isTypeLegal(InVT)))) {
SDLoc dl(N);
EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
InVT.getVectorNumElements());
Expand Down
19 changes: 2 additions & 17 deletions llvm/test/CodeGen/X86/avx512-cvt.ll
Expand Up @@ -836,8 +836,6 @@ define <4 x double> @sitofp_4i1_double(<4 x double> %a) {
; KNL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; KNL-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: vpslld $31, %xmm0, %xmm0
; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
; KNL-NEXT: vcvtdq2pd %xmm0, %ymm0
; KNL-NEXT: retq
;
Expand All @@ -860,21 +858,8 @@ define <2 x float> @sitofp_2i1_float(<2 x float> %a) {
; KNL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
; KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; KNL-NEXT: vpsllq $32, %xmm0, %xmm0
; KNL-NEXT: vpsrad $31, %xmm0, %xmm1
; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; KNL-NEXT: vpextrq $1, %xmm0, %rax
; KNL-NEXT: xorl %ecx, %ecx
; KNL-NEXT: testb $1, %al
; KNL-NEXT: movl $-1, %eax
; KNL-NEXT: movl $0, %edx
; KNL-NEXT: cmovnel %eax, %edx
; KNL-NEXT: vcvtsi2ssl %edx, %xmm2, %xmm1
; KNL-NEXT: vmovq %xmm0, %rdx
; KNL-NEXT: testb $1, %dl
; KNL-NEXT: cmovnel %eax, %ecx
; KNL-NEXT: vcvtsi2ssl %ecx, %xmm2, %xmm0
; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
; KNL-NEXT: vcvtdq2ps %xmm0, %xmm0
; KNL-NEXT: retq
;
; SKX-LABEL: sitofp_2i1_float:
Expand Down
104 changes: 13 additions & 91 deletions llvm/test/CodeGen/X86/sse-fsignum.ll
Expand Up @@ -33,59 +33,19 @@ entry:
}

define void @signum64a(<2 x double>*) {
; AVX1-LABEL: signum64a:
; AVX1: # BB#0: # %entry
; AVX1-NEXT: vmovapd (%rdi), %xmm0
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2
; AVX1-NEXT: vpextrq $1, %xmm2, %rax
; AVX1-NEXT: vmovq %xmm2, %rcx
; AVX1-NEXT: vmovd %ecx, %xmm2
; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
; AVX1-NEXT: vcvtdq2pd %xmm2, %xmm2
; AVX1-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: vmovq %xmm0, %rcx
; AVX1-NEXT: vmovd %ecx, %xmm0
; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX1-NEXT: vsubpd %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vmovapd %xmm0, (%rdi)
; AVX1-NEXT: retq
;
; AVX2-LABEL: signum64a:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: vmovapd (%rdi), %xmm0
; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2
; AVX2-NEXT: vpextrq $1, %xmm2, %rax
; AVX2-NEXT: vmovq %xmm2, %rcx
; AVX2-NEXT: vmovd %ecx, %xmm2
; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
; AVX2-NEXT: vcvtdq2pd %xmm2, %xmm2
; AVX2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
; AVX2-NEXT: vmovq %xmm0, %rcx
; AVX2-NEXT: vmovd %ecx, %xmm0
; AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX2-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX2-NEXT: vsubpd %xmm0, %xmm2, %xmm0
; AVX2-NEXT: vmovapd %xmm0, (%rdi)
; AVX2-NEXT: retq
;
; AVX512F-LABEL: signum64a:
; AVX512F: # BB#0: # %entry
; AVX512F-NEXT: vmovapd (%rdi), %xmm0
; AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2
; AVX512F-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,2,3]
; AVX512F-NEXT: vcvtdq2pd %xmm2, %xmm2
; AVX512F-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512F-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX512F-NEXT: vsubpd %xmm0, %xmm2, %xmm0
; AVX512F-NEXT: vmovapd %xmm0, (%rdi)
; AVX512F-NEXT: retq
; AVX-LABEL: signum64a:
; AVX: # BB#0: # %entry
; AVX-NEXT: vmovapd (%rdi), %xmm0
; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2
; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,2,3]
; AVX-NEXT: vcvtdq2pd %xmm2, %xmm2
; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX-NEXT: vsubpd %xmm0, %xmm2, %xmm0
; AVX-NEXT: vmovapd %xmm0, (%rdi)
; AVX-NEXT: retq
entry:
%1 = load <2 x double>, <2 x double>* %0
%2 = fcmp olt <2 x double> %1, zeroinitializer
Expand All @@ -107,24 +67,8 @@ define void @signum32b(<8 x float>*) {
; AVX1-NEXT: vmovaps (%rdi), %ymm0
; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
; AVX1-NEXT: vcmpltps %ymm1, %ymm0, %ymm2
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
; AVX1-NEXT: vpacksswb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2
; AVX1-NEXT: vpsraw $15, %xmm2, %xmm2
; AVX1-NEXT: vpmovsxwd %xmm2, %xmm3
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
; AVX1-NEXT: vpmovsxwd %xmm2, %xmm2
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX1-NEXT: vcvtdq2ps %ymm2, %ymm2
; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX1-NEXT: vsubps %ymm0, %ymm2, %ymm0
; AVX1-NEXT: vmovaps %ymm0, (%rdi)
Expand All @@ -136,18 +80,8 @@ define void @signum32b(<8 x float>*) {
; AVX2-NEXT: vmovaps (%rdi), %ymm0
; AVX2-NEXT: vxorps %ymm1, %ymm1, %ymm1
; AVX2-NEXT: vcmpltps %ymm1, %ymm0, %ymm2
; AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3
; AVX2-NEXT: vpacksswb %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vpsllw $15, %xmm2, %xmm2
; AVX2-NEXT: vpsraw $15, %xmm2, %xmm2
; AVX2-NEXT: vpmovsxwd %xmm2, %ymm2
; AVX2-NEXT: vcvtdq2ps %ymm2, %ymm2
; AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0
; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX2-NEXT: vsubps %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vmovaps %ymm0, (%rdi)
Expand Down Expand Up @@ -189,14 +123,10 @@ define void @signum64b(<4 x double>*) {
; AVX1-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
; AVX1-NEXT: vpacksswb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
; AVX1-NEXT: vpsrad $31, %xmm2, %xmm2
; AVX1-NEXT: vcvtdq2pd %xmm2, %ymm2
; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX1-NEXT: vsubpd %ymm0, %ymm2, %ymm0
; AVX1-NEXT: vmovapd %ymm0, (%rdi)
Expand All @@ -210,14 +140,10 @@ define void @signum64b(<4 x double>*) {
; AVX2-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
; AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3
; AVX2-NEXT: vpacksswb %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vpslld $31, %xmm2, %xmm2
; AVX2-NEXT: vpsrad $31, %xmm2, %xmm2
; AVX2-NEXT: vcvtdq2pd %xmm2, %ymm2
; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpslld $31, %xmm0, %xmm0
; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX2-NEXT: vsubpd %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vmovapd %ymm0, (%rdi)
Expand All @@ -230,13 +156,9 @@ define void @signum64b(<4 x double>*) {
; AVX512F-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; AVX512F-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
; AVX512F-NEXT: vpmovqd %zmm2, %ymm2
; AVX512F-NEXT: vpslld $31, %xmm2, %xmm2
; AVX512F-NEXT: vpsrad $31, %xmm2, %xmm2
; AVX512F-NEXT: vcvtdq2pd %xmm2, %ymm2
; AVX512F-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512F-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX512F-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX512F-NEXT: vsubpd %ymm0, %ymm2, %ymm0
; AVX512F-NEXT: vmovapd %ymm0, (%rdi)
Expand Down

0 comments on commit ceffb43

Please sign in to comment.