Skip to content

Commit

Permalink
[X86][SSE] Improve recognition of uitofp conversions that can be perf…
Browse files Browse the repository at this point in the history
…ormed as sitofp

With D24253 we can now use SelectionDAG::SignBitIsZero with vector operations.

This patch uses SelectionDAG::SignBitIsZero to recognise that a zero sign bit means that we can use a sitofp instead of a uitofp (which is not directly support on pre-AVX512 hardware).

While AVX512 does provide support for uitofp, the conversion to sitofp should not cause any regressions.

Differential Revision: https://reviews.llvm.org/D24343

llvm-svn: 281852
  • Loading branch information
RKSimon committed Sep 18, 2016
1 parent a1a0e7d commit 6c21e6a
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 495 deletions.
4 changes: 0 additions & 4 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1980,10 +1980,6 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We
/// use this predicate to simplify operations downstream.
bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
// This predicate is not safe for vector operations.
if (Op.getValueType().isVector())
return false;

unsigned BitWidth = Op.getScalarValueSizeInBits();
return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);
}
Expand Down
12 changes: 9 additions & 3 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13871,15 +13871,15 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SDLoc dl(Op);
auto PtrVT = getPointerTy(DAG.getDataLayout());

if (Op.getSimpleValueType().isVector())
return lowerUINT_TO_FP_vec(Op, DAG);

// Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
// optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
// the optimization here.
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0);

if (Op.getSimpleValueType().isVector())
return lowerUINT_TO_FP_vec(Op, DAG);

MVT SrcVT = N0.getSimpleValueType();
MVT DstVT = Op.getSimpleValueType();

Expand Down Expand Up @@ -31204,6 +31204,12 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
}

// Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
// optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
// the optimization here.
if (DAG.SignBitIsZero(Op0))
return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);

return SDValue();
}

Expand Down
19 changes: 6 additions & 13 deletions llvm/test/CodeGen/X86/avx512-cvt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -924,7 +924,7 @@ define <16 x float> @uitofp_16i8(<16 x i8>%a) {
; ALL-LABEL: uitofp_16i8:
; ALL: ## BB#0:
; ALL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; ALL-NEXT: vcvtudq2ps %zmm0, %zmm0
; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0
; ALL-NEXT: retq
%b = uitofp <16 x i8> %a to <16 x float>
ret <16 x float>%b
Expand All @@ -934,7 +934,7 @@ define <16 x float> @uitofp_16i16(<16 x i16>%a) {
; ALL-LABEL: uitofp_16i16:
; ALL: ## BB#0:
; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; ALL-NEXT: vcvtudq2ps %zmm0, %zmm0
; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0
; ALL-NEXT: retq
%b = uitofp <16 x i16> %a to <16 x float>
ret <16 x float>%b
Expand Down Expand Up @@ -1036,9 +1036,8 @@ define <4 x float> @uitofp_4i1_float(<4 x i32> %a) {
; KNL: ## BB#0:
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; KNL-NEXT: vpsrld $31, %xmm0, %xmm0
; KNL-NEXT: vcvtudq2ps %zmm0, %zmm0
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
; KNL-NEXT: retq
;
; SKX-LABEL: uitofp_4i1_float:
Expand All @@ -1059,8 +1058,7 @@ define <4 x double> @uitofp_4i1_double(<4 x i32> %a) {
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; KNL-NEXT: vpsrld $31, %xmm0, %xmm0
; KNL-NEXT: vcvtudq2pd %ymm0, %zmm0
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: vcvtdq2pd %xmm0, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: uitofp_4i1_double:
Expand Down Expand Up @@ -1113,12 +1111,7 @@ define <2 x double> @uitofp_2i1_double(<2 x i32> %a) {
; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0
; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; KNL-NEXT: vpsrlq $63, %xmm0, %xmm0
; KNL-NEXT: vpextrq $1, %xmm0, %rax
; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm1
; KNL-NEXT: vmovq %xmm0, %rax
; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm0
; KNL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; KNL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; KNL-NEXT: retq
;
; SKX-LABEL: uitofp_2i1_double:
Expand Down
Loading

0 comments on commit 6c21e6a

Please sign in to comment.