Skip to content

Commit

Permalink
[X86] Don't lower f16->f80 fpext to libcall on darwin.
Browse files Browse the repository at this point in the history
We don't provide __extendhfxf2, and only have the soft-float
__extendhfsf2 in compiler-rt.  This only changed recently with
655ba9c, so this patch reverts back to the previous behavior.

However, the f80->f16 fptrunc is not easily implementable without
the compiler-rt __truncxfhf2, but that has always been true, and
isn't an immediate regression.

Patch by Ahmed Bougacha.

rdar://102194995
  • Loading branch information
Davide Italiano committed Nov 22, 2022
1 parent c763160 commit 0c01133
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 1 deletion.
5 changes: 4 additions & 1 deletion llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -23154,7 +23154,10 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
SDValue In = Op.getOperand(IsStrict ? 1 : 0);
MVT SVT = In.getSimpleValueType();

if (VT == MVT::f128 || (SVT == MVT::f16 && VT == MVT::f80))
// Let f16->f80 get lowered to a libcall, except for darwin, where we should
// lower it to an fp_extend via f32 (as only f16<>f32 libcalls are available)
if (VT == MVT::f128 || (SVT == MVT::f16 && VT == MVT::f80 &&
!Subtarget.getTargetTriple().isOSDarwin()))
return SDValue();

if (SVT == MVT::f16) {
Expand Down
71 changes: 71 additions & 0 deletions llvm/test/CodeGen/X86/half-fp80-darwin.ll
@@ -0,0 +1,71 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-apple-macosx %s -o - | FileCheck %s --check-prefix=CHECK-SOFT
; RUN: llc -mtriple=x86_64-apple-macosx -mattr=+f16c %s -o - | FileCheck %s --check-prefix=CHECK-F16C

define void @extendhfxf(ptr %outptr, ptr %inptr) nounwind {
; CHECK-SOFT-LABEL: extendhfxf:
; CHECK-SOFT: ## %bb.0:
; CHECK-SOFT-NEXT: pushq %rbx
; CHECK-SOFT-NEXT: subq $16, %rsp
; CHECK-SOFT-NEXT: movq %rdi, %rbx
; CHECK-SOFT-NEXT: movzwl (%rsi), %edi
; CHECK-SOFT-NEXT: callq ___extendhfsf2
; CHECK-SOFT-NEXT: movss %xmm0, {{[0-9]+}}(%rsp)
; CHECK-SOFT-NEXT: flds {{[0-9]+}}(%rsp)
; CHECK-SOFT-NEXT: fstpt (%rbx)
; CHECK-SOFT-NEXT: addq $16, %rsp
; CHECK-SOFT-NEXT: popq %rbx
; CHECK-SOFT-NEXT: retq
;
; CHECK-F16C-LABEL: extendhfxf:
; CHECK-F16C: ## %bb.0:
; CHECK-F16C-NEXT: movzwl (%rsi), %eax
; CHECK-F16C-NEXT: vmovd %eax, %xmm0
; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; CHECK-F16C-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-F16C-NEXT: flds -{{[0-9]+}}(%rsp)
; CHECK-F16C-NEXT: fstpt (%rdi)
; CHECK-F16C-NEXT: retq
%in = load half, ptr %inptr
%fp80 = fpext half %in to x86_fp80
store x86_fp80 %fp80, ptr %outptr
ret void
}

; FIXME: We don't currently provide __truncxfhf2, but we can't lower this as
; successive fptruncs (like we do fpext) because of double rounding.
; We also don't currently soft-float this call, like we do e.g., __truncsfhf2:
; the latter long predates the fp16 parameter passing ABI, so can't change.
; If we ever add a __truncxfhf2, we're not bound by existing ABI.
define void @truncxfhf(ptr %outptr, ptr %inptr) nounwind {
; CHECK-SOFT-LABEL: truncxfhf:
; CHECK-SOFT: ## %bb.0:
; CHECK-SOFT-NEXT: pushq %rbx
; CHECK-SOFT-NEXT: subq $16, %rsp
; CHECK-SOFT-NEXT: movq %rdi, %rbx
; CHECK-SOFT-NEXT: fldt (%rsi)
; CHECK-SOFT-NEXT: fstpt (%rsp)
; CHECK-SOFT-NEXT: callq ___truncxfhf2
; CHECK-SOFT-NEXT: pextrw $0, %xmm0, %eax
; CHECK-SOFT-NEXT: movw %ax, (%rbx)
; CHECK-SOFT-NEXT: addq $16, %rsp
; CHECK-SOFT-NEXT: popq %rbx
; CHECK-SOFT-NEXT: retq
;
; CHECK-F16C-LABEL: truncxfhf:
; CHECK-F16C: ## %bb.0:
; CHECK-F16C-NEXT: pushq %rbx
; CHECK-F16C-NEXT: subq $16, %rsp
; CHECK-F16C-NEXT: movq %rdi, %rbx
; CHECK-F16C-NEXT: fldt (%rsi)
; CHECK-F16C-NEXT: fstpt (%rsp)
; CHECK-F16C-NEXT: callq ___truncxfhf2
; CHECK-F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
; CHECK-F16C-NEXT: addq $16, %rsp
; CHECK-F16C-NEXT: popq %rbx
; CHECK-F16C-NEXT: retq
%in = load x86_fp80, ptr %inptr
%half = fptrunc x86_fp80 %in to half
store half %half, ptr %outptr
ret void
}

0 comments on commit 0c01133

Please sign in to comment.