Skip to content

Commit

Permalink
[RISCV] Custom lower FP_TO_FP16 and FP16_TO_FP to correct ABI of of l…
Browse files Browse the repository at this point in the history
…ibcall

As introduced in D99148, RISC-V uses the softPromoteHalf legalisation
for fp16 values without zfh, with logic ensuring that f16 values are
passed in lower bits of FPRs (see D98670) when F or D support is
present. This legalisation produces ISD::FP_TO_FP16 and ISD::FP16_TO_FP
nodes which (as described in ISDOpcodes.h) provide a "semi-softened
interface for dealing with f16 (as an i16)". i.e. the return type of the
FP_TO_FP16 is an integer rather than a float (and the arg of FP16_TO_FP
is an integer). The remainder of the description focuses primarily on
FP_TO_FP16 for ease of explanation.

FP_TO_FP16 is lowered to a libcall to `__truncsfhf2 (float)` or
`__truncdfhf2 (double)`. As of D92241, `_Float16` is used as the return
type of these libcalls if the host compiler accepts `_Float16` in a test
input (i.e. dst_t is set to `_Float16`). `_Float16` is enabled for the
RISC-V target as of D105001 and so the return value should be passed in
an FPR on hard float ABIs.

This patch fixes the ABI issue in what appears to be a minimally
invasive way - leaving the softPromoteHalf logic undisturbed, and
lowering FP_TO_FP16 to an f32-returning libcall, converting its result
to an XLen integer value.

As can be seen in the test changes, the custom lowering for FP16_TO_FP
means the libcall is no longer tail-callable.

Although this patch fixes the issue, there are two open items:
* Redundant fmv.x.w and fmv.w.x pairs are now somtimes produced during
  lowering (not a correctness issue).
* Now coverage for STRICT variants of FP16 conversion opcodes.

Differential Revision: https://reviews.llvm.org/D151284
  • Loading branch information
asb committed Jun 30, 2023
1 parent ee5aaa8 commit 5ba40c7
Show file tree
Hide file tree
Showing 9 changed files with 421 additions and 301 deletions.
35 changes: 34 additions & 1 deletion llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Expand Up @@ -352,7 +352,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,

static const unsigned FPOpToExpand[] = {
ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16};
ISD::FREM};

static const unsigned FPRndMode[] = {
ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
Expand Down Expand Up @@ -430,6 +430,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
setOperationAction(ISD::FP_TO_BF16, MVT::f32,
Subtarget.isSoftFPABI() ? LibCall : Custom);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);

if (Subtarget.hasStdExtZfa())
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
Expand Down Expand Up @@ -467,6 +469,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_BF16, MVT::f64,
Subtarget.isSoftFPABI() ? LibCall : Custom);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
}

if (Subtarget.is64Bit()) {
Expand Down Expand Up @@ -4960,6 +4964,35 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
return Res;
}
case ISD::FP_TO_FP16: {
// Custom lower to ensure the libcall return is passed in an FPR on hard
// float ABIs.
assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
SDLoc DL(Op);
MakeLibCallOptions CallOptions;
RTLIB::Libcall LC =
RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
SDValue Res =
makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
if (Subtarget.is64Bit())
return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
return DAG.getBitcast(MVT::i32, Res);
}
case ISD::FP16_TO_FP: {
// Custom lower to ensure the libcall argument is passed in an FPR on hard
// float ABIs.
assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
SDLoc DL(Op);
MakeLibCallOptions CallOptions;
SDValue Arg = Subtarget.is64Bit()
? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
Op.getOperand(0))
: DAG.getBitcast(MVT::f32, Op.getOperand(0));
SDValue Res =
makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
.first;
return Res;
}
case ISD::FTRUNC:
case ISD::FCEIL:
case ISD::FFLOOR:
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/RISCV/calling-conv-half.ll
Expand Up @@ -83,7 +83,6 @@ define i32 @callee_half_in_regs(i32 %a, half %b) nounwind {
; RV32-ILP32F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-ILP32F-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32-ILP32F-NEXT: mv s0, a0
; RV32-ILP32F-NEXT: fmv.x.w a0, fa0
; RV32-ILP32F-NEXT: call __extendhfsf2@plt
; RV32-ILP32F-NEXT: fcvt.w.s a0, fa0, rtz
; RV32-ILP32F-NEXT: add a0, s0, a0
Expand All @@ -99,6 +98,7 @@ define i32 @callee_half_in_regs(i32 %a, half %b) nounwind {
; RV64-LP64F-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; RV64-LP64F-NEXT: mv s0, a0
; RV64-LP64F-NEXT: fmv.x.w a0, fa0
; RV64-LP64F-NEXT: fmv.w.x fa0, a0
; RV64-LP64F-NEXT: call __extendhfsf2@plt
; RV64-LP64F-NEXT: fcvt.l.s a0, fa0, rtz
; RV64-LP64F-NEXT: addw a0, s0, a0
Expand Down Expand Up @@ -292,7 +292,6 @@ define i32 @callee_half_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
; RV32-ILP32F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-ILP32F-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32-ILP32F-NEXT: mv s0, a7
; RV32-ILP32F-NEXT: fmv.x.w a0, fa0
; RV32-ILP32F-NEXT: call __extendhfsf2@plt
; RV32-ILP32F-NEXT: fcvt.w.s a0, fa0, rtz
; RV32-ILP32F-NEXT: add a0, s0, a0
Expand All @@ -308,6 +307,7 @@ define i32 @callee_half_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
; RV64-LP64F-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; RV64-LP64F-NEXT: mv s0, a7
; RV64-LP64F-NEXT: fmv.x.w a0, fa0
; RV64-LP64F-NEXT: fmv.w.x fa0, a0
; RV64-LP64F-NEXT: call __extendhfsf2@plt
; RV64-LP64F-NEXT: fcvt.l.s a0, fa0, rtz
; RV64-LP64F-NEXT: addw a0, s0, a0
Expand Down Expand Up @@ -602,7 +602,6 @@ define i32 @caller_half_ret() nounwind {
; RV32-ILP32F-NEXT: addi sp, sp, -16
; RV32-ILP32F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-ILP32F-NEXT: call callee_half_ret@plt
; RV32-ILP32F-NEXT: fmv.x.w a0, fa0
; RV32-ILP32F-NEXT: call __extendhfsf2@plt
; RV32-ILP32F-NEXT: fcvt.w.s a0, fa0, rtz
; RV32-ILP32F-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
Expand All @@ -615,6 +614,7 @@ define i32 @caller_half_ret() nounwind {
; RV64-LP64F-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-LP64F-NEXT: call callee_half_ret@plt
; RV64-LP64F-NEXT: fmv.x.w a0, fa0
; RV64-LP64F-NEXT: fmv.w.x fa0, a0
; RV64-LP64F-NEXT: call __extendhfsf2@plt
; RV64-LP64F-NEXT: fcvt.l.s a0, fa0, rtz
; RV64-LP64F-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
Expand Down
8 changes: 5 additions & 3 deletions llvm/test/CodeGen/RISCV/copysign-casts.ll
Expand Up @@ -163,7 +163,7 @@ define double @fold_promote_d_h(double %a, half %b) nounwind {
; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fmv.d fs0, fa0
; RV32IFD-NEXT: fmv.x.w a0, fa1
; RV32IFD-NEXT: fmv.s fa0, fa1
; RV32IFD-NEXT: call __extendhfsf2@plt
; RV32IFD-NEXT: fcvt.d.s fa5, fa0
; RV32IFD-NEXT: fsgnj.d fa0, fs0, fa5
Expand All @@ -179,6 +179,7 @@ define double @fold_promote_d_h(double %a, half %b) nounwind {
; RV64IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV64IFD-NEXT: fmv.d fs0, fa0
; RV64IFD-NEXT: fmv.x.w a0, fa1
; RV64IFD-NEXT: fmv.w.x fa0, a0
; RV64IFD-NEXT: call __extendhfsf2@plt
; RV64IFD-NEXT: fcvt.d.s fa5, fa0
; RV64IFD-NEXT: fsgnj.d fa0, fs0, fa5
Expand Down Expand Up @@ -264,7 +265,7 @@ define float @fold_promote_f_h(float %a, half %b) nounwind {
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IF-NEXT: fmv.s fs0, fa0
; RV32IF-NEXT: fmv.x.w a0, fa1
; RV32IF-NEXT: fmv.s fa0, fa1
; RV32IF-NEXT: call __extendhfsf2@plt
; RV32IF-NEXT: fsgnj.s fa0, fs0, fa0
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
Expand All @@ -278,7 +279,7 @@ define float @fold_promote_f_h(float %a, half %b) nounwind {
; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fmv.s fs0, fa0
; RV32IFD-NEXT: fmv.x.w a0, fa1
; RV32IFD-NEXT: fmv.s fa0, fa1
; RV32IFD-NEXT: call __extendhfsf2@plt
; RV32IFD-NEXT: fsgnj.s fa0, fs0, fa0
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
Expand All @@ -293,6 +294,7 @@ define float @fold_promote_f_h(float %a, half %b) nounwind {
; RV64IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV64IFD-NEXT: fmv.s fs0, fa0
; RV64IFD-NEXT: fmv.x.w a0, fa1
; RV64IFD-NEXT: fmv.w.x fa0, a0
; RV64IFD-NEXT: call __extendhfsf2@plt
; RV64IFD-NEXT: fsgnj.s fa0, fs0, fa0
; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
Expand Down
53 changes: 34 additions & 19 deletions llvm/test/CodeGen/RISCV/fp16-promote.ll
Expand Up @@ -15,8 +15,14 @@ define void @test_load_store(ptr %p, ptr %q) nounwind {
define float @test_fpextend_float(ptr %p) nounwind {
; CHECK-LABEL: test_fpextend_float:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; CHECK-NEXT: lhu a0, 0(a0)
; CHECK-NEXT: tail __extendhfsf2@plt
; CHECK-NEXT: fmv.w.x fa0, a0
; CHECK-NEXT: call __extendhfsf2@plt
; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%a = load half, ptr %p
%r = fpext half %a to float
ret float %r
Expand All @@ -28,6 +34,7 @@ define double @test_fpextend_double(ptr %p) nounwind {
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; CHECK-NEXT: lhu a0, 0(a0)
; CHECK-NEXT: fmv.w.x fa0, a0
; CHECK-NEXT: call __extendhfsf2@plt
; CHECK-NEXT: fcvt.d.s fa0, fa0
; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
Expand All @@ -46,6 +53,7 @@ define void @test_fptrunc_float(float %f, ptr %p) nounwind {
; CHECK-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; CHECK-NEXT: mv s0, a0
; CHECK-NEXT: call __truncsfhf2@plt
; CHECK-NEXT: fmv.x.w a0, fa0
; CHECK-NEXT: sh a0, 0(s0)
; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; CHECK-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
Expand All @@ -64,6 +72,7 @@ define void @test_fptrunc_double(double %d, ptr %p) nounwind {
; CHECK-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; CHECK-NEXT: mv s0, a0
; CHECK-NEXT: call __truncdfhf2@plt
; CHECK-NEXT: fmv.x.w a0, fa0
; CHECK-NEXT: sh a0, 0(s0)
; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; CHECK-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
Expand All @@ -80,22 +89,25 @@ define void @test_fadd(ptr %p, ptr %q) nounwind {
; CHECK-NEXT: addi sp, sp, -32
; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; CHECK-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; CHECK-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; CHECK-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill
; CHECK-NEXT: mv s0, a0
; CHECK-NEXT: lhu s1, 0(a0)
; CHECK-NEXT: lhu a0, 0(a1)
; CHECK-NEXT: lhu a0, 0(a0)
; CHECK-NEXT: lhu a1, 0(a1)
; CHECK-NEXT: fmv.w.x fs0, a0
; CHECK-NEXT: fmv.w.x fa0, a1
; CHECK-NEXT: call __extendhfsf2@plt
; CHECK-NEXT: fmv.s fs0, fa0
; CHECK-NEXT: mv a0, s1
; CHECK-NEXT: fmv.s fs1, fa0
; CHECK-NEXT: fmv.s fa0, fs0
; CHECK-NEXT: call __extendhfsf2@plt
; CHECK-NEXT: fadd.s fa0, fa0, fs0
; CHECK-NEXT: fadd.s fa0, fa0, fs1
; CHECK-NEXT: call __truncsfhf2@plt
; CHECK-NEXT: fmv.x.w a0, fa0
; CHECK-NEXT: sh a0, 0(s0)
; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; CHECK-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; CHECK-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; CHECK-NEXT: fld fs0, 8(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 32
; CHECK-NEXT: ret
%a = load half, ptr %p
Expand All @@ -111,22 +123,25 @@ define void @test_fmul(ptr %p, ptr %q) nounwind {
; CHECK-NEXT: addi sp, sp, -32
; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; CHECK-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; CHECK-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; CHECK-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill
; CHECK-NEXT: mv s0, a0
; CHECK-NEXT: lhu s1, 0(a0)
; CHECK-NEXT: lhu a0, 0(a1)
; CHECK-NEXT: lhu a0, 0(a0)
; CHECK-NEXT: lhu a1, 0(a1)
; CHECK-NEXT: fmv.w.x fs0, a0
; CHECK-NEXT: fmv.w.x fa0, a1
; CHECK-NEXT: call __extendhfsf2@plt
; CHECK-NEXT: fmv.s fs0, fa0
; CHECK-NEXT: mv a0, s1
; CHECK-NEXT: fmv.s fs1, fa0
; CHECK-NEXT: fmv.s fa0, fs0
; CHECK-NEXT: call __extendhfsf2@plt
; CHECK-NEXT: fmul.s fa0, fa0, fs0
; CHECK-NEXT: fmul.s fa0, fa0, fs1
; CHECK-NEXT: call __truncsfhf2@plt
; CHECK-NEXT: fmv.x.w a0, fa0
; CHECK-NEXT: sh a0, 0(s0)
; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; CHECK-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; CHECK-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; CHECK-NEXT: fld fs0, 8(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 32
; CHECK-NEXT: ret
%a = load half, ptr %p
Expand Down

0 comments on commit 5ba40c7

Please sign in to comment.