Skip to content

Commit

Permalink
[DAGCombiner] Don't emit select_cc from visitSINT_TO_FP/visitUINT_TO_…
Browse files Browse the repository at this point in the history
…FP. Use plain select instead.

Select_cc isn't used by all targets. X86 doesn't have optimizations
for it.

Since we already know the input to the sint_to_fp/uint_to_fp is
a setcc we can just emit a plain select using that setcc as the
condition. Other DAG combines can turn that into a select_cc on
targets that support it.

Differential Revision: https://reviews.llvm.org/D75415
  • Loading branch information
topperc committed Mar 1, 2020
1 parent 66128c4 commit 211fb91
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 79 deletions.
62 changes: 23 additions & 39 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -13157,33 +13157,24 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
}

// The next optimizations are desirable only if SELECT_CC can be lowered.
if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
// fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
!VT.isVector() &&
(!LegalOperations ||
TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
SDLoc DL(N);
SDValue Ops[] =
{ N0.getOperand(0), N0.getOperand(1),
DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
N0.getOperand(2) };
return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
}
// fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
!VT.isVector() &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
SDLoc DL(N);
return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
DAG.getConstantFP(0.0, DL, VT));
}

// fold (sint_to_fp (zext (setcc x, y, cc))) ->
// (select_cc x, y, 1.0, 0.0,, cc)
if (N0.getOpcode() == ISD::ZERO_EXTEND &&
N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
(!LegalOperations ||
TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
SDLoc DL(N);
SDValue Ops[] =
{ N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
N0.getOperand(0).getOperand(2) };
return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
}
// fold (sint_to_fp (zext (setcc x, y, cc))) ->
// (select (setcc x, y, cc), 1.0, 0.0)
if (N0.getOpcode() == ISD::ZERO_EXTEND &&
N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
SDLoc DL(N);
return DAG.getSelect(DL, VT, N0.getOperand(0),
DAG.getConstantFP(1.0, DL, VT),
DAG.getConstantFP(0.0, DL, VT));
}

if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
Expand Down Expand Up @@ -13217,19 +13208,12 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
}

// The next optimizations are desirable only if SELECT_CC can be lowered.
if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
// fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
(!LegalOperations ||
TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
SDLoc DL(N);
SDValue Ops[] =
{ N0.getOperand(0), N0.getOperand(1),
DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
N0.getOperand(2) };
return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
}
// fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
SDLoc DL(N);
return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
DAG.getConstantFP(0.0, DL, VT));
}

if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
Expand Down
22 changes: 6 additions & 16 deletions llvm/test/CodeGen/X86/cmovcmov.ll
Expand Up @@ -216,14 +216,9 @@ entry:
define float @test_zext_fcmp_une(float %a, float %b) nounwind {
; CMOV-LABEL: test_zext_fcmp_une:
; CMOV: # %bb.0: # %entry
; CMOV-NEXT: ucomiss %xmm1, %xmm0
; CMOV-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CMOV-NEXT: jne .LBB5_3
; CMOV-NEXT: # %bb.1: # %entry
; CMOV-NEXT: jp .LBB5_3
; CMOV-NEXT: # %bb.2: # %entry
; CMOV-NEXT: xorps %xmm0, %xmm0
; CMOV-NEXT: .LBB5_3: # %entry
; CMOV-NEXT: cmpneqss %xmm1, %xmm0
; CMOV-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CMOV-NEXT: andps %xmm1, %xmm0
; CMOV-NEXT: retq
;
; NOCMOV-LABEL: test_zext_fcmp_une:
Expand Down Expand Up @@ -259,14 +254,9 @@ entry:
define float @test_zext_fcmp_oeq(float %a, float %b) nounwind {
; CMOV-LABEL: test_zext_fcmp_oeq:
; CMOV: # %bb.0: # %entry
; CMOV-NEXT: ucomiss %xmm1, %xmm0
; CMOV-NEXT: xorps %xmm0, %xmm0
; CMOV-NEXT: jne .LBB6_3
; CMOV-NEXT: # %bb.1: # %entry
; CMOV-NEXT: jp .LBB6_3
; CMOV-NEXT: # %bb.2: # %entry
; CMOV-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CMOV-NEXT: .LBB6_3: # %entry
; CMOV-NEXT: cmpeqss %xmm1, %xmm0
; CMOV-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CMOV-NEXT: andps %xmm1, %xmm0
; CMOV-NEXT: retq
;
; NOCMOV-LABEL: test_zext_fcmp_oeq:
Expand Down
36 changes: 12 additions & 24 deletions llvm/test/CodeGen/X86/pr44749.ll
Expand Up @@ -4,8 +4,8 @@
define i32 @a() {
; CHECK-LABEL: a:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: xorl %eax, %eax
Expand All @@ -16,31 +16,19 @@ define i32 @a() {
; CHECK-NEXT: cvtsi2sd %eax, %xmm0
; CHECK-NEXT: movq _calloc@{{.*}}(%rip), %rcx
; CHECK-NEXT: subq $-1, %rcx
; CHECK-NEXT: setne %dl
; CHECK-NEXT: movzbl %dl, %eax
; CHECK-NEXT: movl %eax, %esi
; CHECK-NEXT: leaq {{.*}}(%rip), %rdi
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: xorps %xmm2, %xmm2
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK-NEXT: movsd %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK-NEXT: movsd %xmm1, (%rsp) ## 8-byte Spill
; CHECK-NEXT: jne LBB0_2
; CHECK-NEXT: ## %bb.1: ## %entry
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 8-byte Reload
; CHECK-NEXT: ## xmm0 = mem[0],zero
; CHECK-NEXT: movsd %xmm0, (%rsp) ## 8-byte Spill
; CHECK-NEXT: LBB0_2: ## %entry
; CHECK-NEXT: movsd (%rsp), %xmm0 ## 8-byte Reload
; CHECK-NEXT: ## xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: addsd %xmm1, %xmm0
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 8-byte Reload
; CHECK-NEXT: ## xmm1 = mem[0],zero
; CHECK-NEXT: ucomisd %xmm0, %xmm1
; CHECK-NEXT: setae %al
; CHECK-NEXT: movzbl %al, %ecx
; CHECK-NEXT: movl %ecx, %edx
; CHECK-NEXT: leaq {{.*}}(%rip), %rsi
; CHECK-NEXT: ucomisd %xmm1, %xmm0
; CHECK-NEXT: setae %dl
; CHECK-NEXT: movzbl %dl, %eax
; CHECK-NEXT: movl %eax, %esi
; CHECK-NEXT: leaq {{.*}}(%rip), %rdi
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: cvttsd2si %xmm0, %eax
; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: retq
entry:
%call = call i32 (...) @b()
Expand Down

0 comments on commit 211fb91

Please sign in to comment.