Skip to content

Commit

Permalink
[X86] Change legalization action for f128 fadd/fsub/fmul/fdiv from Cu…
Browse files Browse the repository at this point in the history
…stom to LibCall.

The custom code just emits a libcall, but we can do the same
with generic code. The only difference is that the generic code
can form tail calls where the custom code couldn't. This is
responsible for the test changes.

This avoids needing to modify the Custom handling for strict fp.
  • Loading branch information
topperc committed Nov 21, 2019
1 parent fea8288 commit 1439059
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 59 deletions.
16 changes: 4 additions & 12 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -657,10 +657,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,

addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps

setOperationAction(ISD::FADD, MVT::f128, Custom);
setOperationAction(ISD::FSUB, MVT::f128, Custom);
setOperationAction(ISD::FDIV, MVT::f128, Custom);
setOperationAction(ISD::FMUL, MVT::f128, Custom);
setOperationAction(ISD::FADD, MVT::f128, LibCall);
setOperationAction(ISD::FSUB, MVT::f128, LibCall);
setOperationAction(ISD::FDIV, MVT::f128, LibCall);
setOperationAction(ISD::FMUL, MVT::f128, LibCall);
setOperationAction(ISD::FMA, MVT::f128, Expand);

setOperationAction(ISD::FABS, MVT::f128, Custom);
Expand Down Expand Up @@ -19792,12 +19792,6 @@ static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG,
/// Depending on uarch and/or optimizing for size, we might prefer to use a
/// vector operation in place of the typical scalar operation.
SDValue X86TargetLowering::lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const {
if (Op.getValueType() == MVT::f128) {
RTLIB::Libcall LC = Op.getOpcode() == ISD::FADD ? RTLIB::ADD_F128
: RTLIB::SUB_F128;
return LowerF128Call(Op, DAG, LC);
}

assert((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) &&
"Only expecting float/double");
return lowerAddSubToHorizontalOp(Op, DAG, Subtarget);
Expand Down Expand Up @@ -27797,8 +27791,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::STORE: return LowerStore(Op, Subtarget, DAG);
case ISD::FADD:
case ISD::FSUB: return lowerFaddFsub(Op, DAG);
case ISD::FMUL: return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
case ISD::FDIV: return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
case ISD::FABS:
case ISD::FNEG: return LowerFABSorFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/X86/fp128-extract.ll
Expand Up @@ -9,15 +9,14 @@ define fp128 @TestExtract(<2 x double> %x) nounwind {
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: callq __extenddftf2
; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq __extenddftf2
; CHECK-NEXT: movaps %xmm0, %xmm1
; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __multf3
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: retq
; CHECK-NEXT: jmp __multf3 # TAILCALL
entry:
; Simplified instruction pattern from the output of llvm before r289042,
; for a boost function ...::insert<...>::traverse<...>().
Expand Down
8 changes: 2 additions & 6 deletions llvm/test/CodeGen/X86/fp128-g.ll
Expand Up @@ -54,9 +54,7 @@ entry:
; X64: movaps (%rdi), %xmm0
; X64: .loc
; X64: movaps %xmm0, %xmm1
; X64: callq __addtf3
; X64: .loc
; X64: retq
; X64: jmp __addtf3
}

; Function Attrs: norecurse nounwind readonly uwtable
Expand Down Expand Up @@ -102,9 +100,7 @@ entry:
; X64: movaps (%rax), %xmm0
; X64: .loc
; X64: movaps %xmm0, %xmm1
; X64: callq __addtf3
; X64: .loc
; X64: retq
; X64: jmp __addtf3
}

; Function Attrs: nounwind readnone
Expand Down
60 changes: 24 additions & 36 deletions llvm/test/CodeGen/X86/fp128-i128.ll
Expand Up @@ -309,29 +309,23 @@ if.end: ; preds = %if.then, %entry
define fp128 @TestI128_4(fp128 %x) #0 {
; SSE-LABEL: TestI128_4:
; SSE: # %bb.0: # %entry
; SSE-NEXT: subq $40, %rsp
; SSE-NEXT: movaps %xmm0, %xmm1
; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax
; SSE-NEXT: movq %rax, {{[0-9]+}}(%rsp)
; SSE-NEXT: movq $0, (%rsp)
; SSE-NEXT: movaps (%rsp), %xmm0
; SSE-NEXT: callq __addtf3
; SSE-NEXT: addq $40, %rsp
; SSE-NEXT: retq
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
; SSE-NEXT: jmp __addtf3 # TAILCALL
;
; AVX-LABEL: TestI128_4:
; AVX: # %bb.0: # %entry
; AVX-NEXT: subq $40, %rsp
; AVX-NEXT: vmovaps %xmm0, %xmm1
; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp)
; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rax
; AVX-NEXT: movq %rax, {{[0-9]+}}(%rsp)
; AVX-NEXT: movq $0, (%rsp)
; AVX-NEXT: vmovaps (%rsp), %xmm0
; AVX-NEXT: callq __addtf3
; AVX-NEXT: addq $40, %rsp
; AVX-NEXT: retq
; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; AVX-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
; AVX-NEXT: jmp __addtf3 # TAILCALL
entry:
%0 = bitcast fp128 %x to i128
%bf.clear = and i128 %0, -18446744073709551616
Expand Down Expand Up @@ -370,29 +364,23 @@ entry:
define fp128 @acosl(fp128 %x) #0 {
; SSE-LABEL: acosl:
; SSE: # %bb.0: # %entry
; SSE-NEXT: subq $40, %rsp
; SSE-NEXT: movaps %xmm0, %xmm1
; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax
; SSE-NEXT: movq %rax, {{[0-9]+}}(%rsp)
; SSE-NEXT: movq $0, (%rsp)
; SSE-NEXT: movaps (%rsp), %xmm0
; SSE-NEXT: callq __addtf3
; SSE-NEXT: addq $40, %rsp
; SSE-NEXT: retq
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
; SSE-NEXT: jmp __addtf3 # TAILCALL
;
; AVX-LABEL: acosl:
; AVX: # %bb.0: # %entry
; AVX-NEXT: subq $40, %rsp
; AVX-NEXT: vmovaps %xmm0, %xmm1
; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp)
; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rax
; AVX-NEXT: movq %rax, {{[0-9]+}}(%rsp)
; AVX-NEXT: movq $0, (%rsp)
; AVX-NEXT: vmovaps (%rsp), %xmm0
; AVX-NEXT: callq __addtf3
; AVX-NEXT: addq $40, %rsp
; AVX-NEXT: retq
; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; AVX-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
; AVX-NEXT: jmp __addtf3 # TAILCALL
entry:
%0 = bitcast fp128 %x to i128
%bf.clear = and i128 %0, -18446744073709551616
Expand Down
5 changes: 4 additions & 1 deletion llvm/test/CodeGen/X86/soft-fp.ll
Expand Up @@ -53,5 +53,8 @@ entry:
; SOFT2-NOT: xmm{{[0-9]+}}
; SSE1: xmm{{[0-9]+}}
; SSE2: xmm{{[0-9]+}}
; CHECK: ret{{[lq]}}
; SOFT1: ret{{[lq]}}
; SOFT2: ret{{[lq]}}
; SSE1: jmp __addtf3
; SSE2: jmp __addtf3
}

0 comments on commit 1439059

Please sign in to comment.