Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[X86] Don't lower f16->f80 fpext to libcall on darwin.
We don't provide __extendhfxf2, and only have the soft-float __extendhfsf2 in compiler-rt. This only changed recently with 655ba9c, so this patch reverts back to the previous behavior. However, the f80->f16 fptrunc is not easily implementable without the compiler-rt __truncxfhf2, but that has always been true, and isn't an immediate regression. Patch by Ahmed Bougacha. rdar://102194995
- Loading branch information
Davide Italiano
committed
Nov 22, 2022
1 parent
c763160
commit 0c01133
Showing
2 changed files
with
75 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
; RUN: llc -mtriple=x86_64-apple-macosx %s -o - | FileCheck %s --check-prefix=CHECK-SOFT | ||
; RUN: llc -mtriple=x86_64-apple-macosx -mattr=+f16c %s -o - | FileCheck %s --check-prefix=CHECK-F16C | ||
|
||
define void @extendhfxf(ptr %outptr, ptr %inptr) nounwind { | ||
; CHECK-SOFT-LABEL: extendhfxf: | ||
; CHECK-SOFT: ## %bb.0: | ||
; CHECK-SOFT-NEXT: pushq %rbx | ||
; CHECK-SOFT-NEXT: subq $16, %rsp | ||
; CHECK-SOFT-NEXT: movq %rdi, %rbx | ||
; CHECK-SOFT-NEXT: movzwl (%rsi), %edi | ||
; CHECK-SOFT-NEXT: callq ___extendhfsf2 | ||
; CHECK-SOFT-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) | ||
; CHECK-SOFT-NEXT: flds {{[0-9]+}}(%rsp) | ||
; CHECK-SOFT-NEXT: fstpt (%rbx) | ||
; CHECK-SOFT-NEXT: addq $16, %rsp | ||
; CHECK-SOFT-NEXT: popq %rbx | ||
; CHECK-SOFT-NEXT: retq | ||
; | ||
; CHECK-F16C-LABEL: extendhfxf: | ||
; CHECK-F16C: ## %bb.0: | ||
; CHECK-F16C-NEXT: movzwl (%rsi), %eax | ||
; CHECK-F16C-NEXT: vmovd %eax, %xmm0 | ||
; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 | ||
; CHECK-F16C-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) | ||
; CHECK-F16C-NEXT: flds -{{[0-9]+}}(%rsp) | ||
; CHECK-F16C-NEXT: fstpt (%rdi) | ||
; CHECK-F16C-NEXT: retq | ||
%in = load half, ptr %inptr | ||
%fp80 = fpext half %in to x86_fp80 | ||
store x86_fp80 %fp80, ptr %outptr | ||
ret void | ||
} | ||
|
||
; FIXME: We don't currently provide __truncxfhf2, but we can't lower this as | ||
; successive fptruncs (like we do fpext) because of double rounding. | ||
; We also don't currently soft-float this call, like we do e.g., __truncsfhf2: | ||
; the latter long predates the fp16 parameter passing ABI, so can't change. | ||
; If we ever add a __truncxfhf2, we're not bound by existing ABI. | ||
define void @truncxfhf(ptr %outptr, ptr %inptr) nounwind { | ||
; CHECK-SOFT-LABEL: truncxfhf: | ||
; CHECK-SOFT: ## %bb.0: | ||
; CHECK-SOFT-NEXT: pushq %rbx | ||
; CHECK-SOFT-NEXT: subq $16, %rsp | ||
; CHECK-SOFT-NEXT: movq %rdi, %rbx | ||
; CHECK-SOFT-NEXT: fldt (%rsi) | ||
; CHECK-SOFT-NEXT: fstpt (%rsp) | ||
; CHECK-SOFT-NEXT: callq ___truncxfhf2 | ||
; CHECK-SOFT-NEXT: pextrw $0, %xmm0, %eax | ||
; CHECK-SOFT-NEXT: movw %ax, (%rbx) | ||
; CHECK-SOFT-NEXT: addq $16, %rsp | ||
; CHECK-SOFT-NEXT: popq %rbx | ||
; CHECK-SOFT-NEXT: retq | ||
; | ||
; CHECK-F16C-LABEL: truncxfhf: | ||
; CHECK-F16C: ## %bb.0: | ||
; CHECK-F16C-NEXT: pushq %rbx | ||
; CHECK-F16C-NEXT: subq $16, %rsp | ||
; CHECK-F16C-NEXT: movq %rdi, %rbx | ||
; CHECK-F16C-NEXT: fldt (%rsi) | ||
; CHECK-F16C-NEXT: fstpt (%rsp) | ||
; CHECK-F16C-NEXT: callq ___truncxfhf2 | ||
; CHECK-F16C-NEXT: vpextrw $0, %xmm0, (%rbx) | ||
; CHECK-F16C-NEXT: addq $16, %rsp | ||
; CHECK-F16C-NEXT: popq %rbx | ||
; CHECK-F16C-NEXT: retq | ||
%in = load x86_fp80, ptr %inptr | ||
%half = fptrunc x86_fp80 %in to half | ||
store half %half, ptr %outptr | ||
ret void | ||
} |