Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 61 additions & 18 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IntervalMap.h"
Expand Down Expand Up @@ -18873,6 +18874,8 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
const TargetLowering &TLI) {
// We can fold the fpto[us]i -> [us]itofp pattern into a single ftrunc.
// Additionally, if there are clamps ([us]min or [us]max) around
// the fpto[us]i, we can fold those into fminnum/fmaxnum around the ftrunc.
// If NoSignedZerosFPMath is enabled, this is a direct replacement.
// Otherwise, for strict math, we must handle edge cases:
// 1. For unsigned conversions, use FABS to handle negative cases. Take -0.0
Expand All @@ -18884,28 +18887,68 @@ static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
if (!TLI.isOperationLegal(ISD::FTRUNC, VT))
return SDValue();

// fptosi/fptoui round towards zero, so converting from FP to integer and
// back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
SDValue N0 = N->getOperand(0);
if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
N0.getOperand(0).getValueType() == VT) {
if (DAG.getTarget().Options.NoSignedZerosFPMath)
return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
}
bool IsUnsigned = N->getOpcode() == ISD::UINT_TO_FP;
bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP;
assert(IsSigned || IsUnsigned);

if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
N0.getOperand(0).getValueType() == VT) {
if (DAG.getTarget().Options.NoSignedZerosFPMath)
return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
bool IsSignedZeroSafe = DAG.getTarget().Options.NoSignedZerosFPMath;
// For signed conversions: The optimization changes signed zero behavior.
if (IsSigned && !IsSignedZeroSafe)
return SDValue();
// For unsigned conversions, we need FABS to canonicalize -0.0 to +0.0
// (unless NoSignedZerosFPMath is set).
if (IsUnsigned && !IsSignedZeroSafe && !TLI.isFAbsFree(VT))
return SDValue();

// Strict math: use FABS to handle negative inputs correctly.
if (TLI.isFAbsFree(VT)) {
SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, N0.getOperand(0));
return DAG.getNode(ISD::FTRUNC, DL, VT, Abs);
}
// Collect potential clamp operations (innermost to outermost) and peel.
struct ClampOp {
unsigned Opcode;
SDValue Constant;
};
SmallVector<ClampOp, 2> Clamps;
unsigned MinOp = IsUnsigned ? ISD::UMIN : ISD::SMIN;
unsigned MaxOp = IsUnsigned ? ISD::UMAX : ISD::SMAX;
SDValue IntVal = N->getOperand(0);
constexpr unsigned MaxClampLevels = 2;
for (unsigned Level = 0; Level < MaxClampLevels; ++Level) {
if (!IntVal.hasOneUse() ||
(IntVal.getOpcode() != MinOp && IntVal.getOpcode() != MaxOp))
break;
unsigned FPClampOp =
(IntVal.getOpcode() == MinOp) ? ISD::FMINNUM : ISD::FMAXNUM;
if (!TLI.isOperationLegal(FPClampOp, VT))
return SDValue();
auto *IntConstNode = dyn_cast<ConstantSDNode>(IntVal.getOperand(1));
if (!IntConstNode)
return SDValue();
APFloat FPConst(VT.getFltSemantics());
APInt IntConst = IntConstNode->getAPIntValue();
FPConst.convertFromAPInt(IntConst, IsSigned, APFloat::rmNearestTiesToEven);
// Verify roundtrip exactness.
APSInt RoundTrip(IntConst.getBitWidth(), IsUnsigned);
bool IsExact;
if (FPConst.convertToInteger(RoundTrip, APFloat::rmTowardZero, &IsExact) !=
APFloat::opOK ||
!IsExact || static_cast<const APInt &>(RoundTrip) != IntConst)
return SDValue();
Clamps.push_back({FPClampOp, DAG.getConstantFP(FPConst, DL, VT)});
IntVal = IntVal.getOperand(0);
}

return SDValue();
// Check that the sequence ends with a FPTo[us]i of the right type.
unsigned FPToIntOp = IsUnsigned ? ISD::FP_TO_UINT : ISD::FP_TO_SINT;
if (IntVal.getOpcode() != FPToIntOp ||
IntVal.getOperand(0).getValueType() != VT)
return SDValue();

SDValue Result = IntVal.getOperand(0);
if (IsUnsigned && !IsSignedZeroSafe && TLI.isFAbsFree(VT))
Result = DAG.getNode(ISD::FABS, DL, VT, Result);
Result = DAG.getNode(ISD::FTRUNC, DL, VT, Result);
// Apply clamps, if any, in reverse order (innermost first).
for (auto I = Clamps.rbegin(), E = Clamps.rend(); I != E; ++I)
Result = DAG.getNode(I->Opcode, DL, VT, Result, I->Constant);
return Result;
}

SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
Expand Down
140 changes: 140 additions & 0 deletions llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
; RUN: llc -mtriple=aarch64 --enable-no-signed-zeros-fp-math < %s | FileCheck %s --check-prefix=NO-SIGNED-ZEROS

; Test folding of float->int->float roundtrips into float-only operations.
; The optimization could converts patterns like:
; sitofp(fptosi(x)) -> ftrunc(x)
; sitofp(smin(fptosi(x), C)) -> fminnum(ftrunc(x), (float)C)
; This is relevant for AArch64 as it avoids GPR bouncing and keeps computation in SIMD/FP registers.

define float @test_signed_basic(float %x) {
; CHECK-LABEL: test_signed_basic:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs s0, s0
; CHECK-NEXT: scvtf s0, s0
; CHECK-NEXT: ret
;
; NO-SIGNED-ZEROS-LABEL: test_signed_basic:
; NO-SIGNED-ZEROS: // %bb.0: // %entry
; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
; NO-SIGNED-ZEROS-NEXT: ret
entry:
%i = fptosi float %x to i32
%f = sitofp i32 %i to float
ret float %f
}

define float @test_unsigned_basic(float %x) {
; CHECK-LABEL: test_unsigned_basic:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzu s0, s0
; CHECK-NEXT: ucvtf s0, s0
; CHECK-NEXT: ret
;
; NO-SIGNED-ZEROS-LABEL: test_unsigned_basic:
; NO-SIGNED-ZEROS: // %bb.0: // %entry
; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
; NO-SIGNED-ZEROS-NEXT: ret
entry:
%i = fptoui float %x to i32
%f = uitofp i32 %i to float
ret float %f
}

define float @test_signed_min_max(float %x) {
; CHECK-LABEL: test_signed_min_max:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs w9, s0
; CHECK-NEXT: mov w8, #-512 // =0xfffffe00
; CHECK-NEXT: cmn w9, #512
; CHECK-NEXT: csel w8, w9, w8, gt
; CHECK-NEXT: mov w9, #1023 // =0x3ff
; CHECK-NEXT: cmp w8, #1023
; CHECK-NEXT: csel w8, w8, w9, lt
; CHECK-NEXT: scvtf s0, w8
; CHECK-NEXT: ret
;
; NO-SIGNED-ZEROS-LABEL: test_signed_min_max:
; NO-SIGNED-ZEROS: // %bb.0: // %entry
; NO-SIGNED-ZEROS-NEXT: movi v1.2s, #196, lsl #24
; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
; NO-SIGNED-ZEROS-NEXT: mov w8, #49152 // =0xc000
; NO-SIGNED-ZEROS-NEXT: movk w8, #17535, lsl #16
; NO-SIGNED-ZEROS-NEXT: fmaxnm s0, s0, s1
; NO-SIGNED-ZEROS-NEXT: fmov s1, w8
; NO-SIGNED-ZEROS-NEXT: fminnm s0, s0, s1
; NO-SIGNED-ZEROS-NEXT: ret
entry:
%i = fptosi float %x to i32
%lower = call i32 @llvm.smax.i32(i32 %i, i32 -512)
%clamped = call i32 @llvm.smin.i32(i32 %lower, i32 1023)
%f = sitofp i32 %clamped to float
ret float %f
}

define float @test_unsigned_min_max(float %x) {
; CHECK-LABEL: test_unsigned_min_max:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzu w9, s0
; CHECK-NEXT: mov w8, #512 // =0x200
; CHECK-NEXT: cmp w9, #512
; CHECK-NEXT: csel w8, w9, w8, hi
; CHECK-NEXT: mov w9, #1023 // =0x3ff
; CHECK-NEXT: cmp w8, #1023
; CHECK-NEXT: csel w8, w8, w9, lo
; CHECK-NEXT: ucvtf s0, w8
; CHECK-NEXT: ret
;
; NO-SIGNED-ZEROS-LABEL: test_unsigned_min_max:
; NO-SIGNED-ZEROS: // %bb.0: // %entry
; NO-SIGNED-ZEROS-NEXT: movi v1.2s, #68, lsl #24
; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
; NO-SIGNED-ZEROS-NEXT: mov w8, #49152 // =0xc000
; NO-SIGNED-ZEROS-NEXT: movk w8, #17535, lsl #16
; NO-SIGNED-ZEROS-NEXT: fmaxnm s0, s0, s1
; NO-SIGNED-ZEROS-NEXT: fmov s1, w8
; NO-SIGNED-ZEROS-NEXT: fminnm s0, s0, s1
; NO-SIGNED-ZEROS-NEXT: ret
entry:
%i = fptoui float %x to i32
%lower = call i32 @llvm.umax.i32(i32 %i, i32 512)
%clamped = call i32 @llvm.umin.i32(i32 %lower, i32 1023)
%f = uitofp i32 %clamped to float
ret float %f
}

; 16777217 is NOT exactly representable in f32.
define float @test_inexact_16777217(float %x) {
; CHECK-LABEL: test_inexact_16777217:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs w8, s0
; CHECK-NEXT: mov w9, #16777216 // =0x1000000
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: mov w9, #1 // =0x1
; CHECK-NEXT: movk w9, #256, lsl #16
; CHECK-NEXT: csel w8, w8, w9, le
; CHECK-NEXT: scvtf s0, w8
; CHECK-NEXT: ret
;
; NO-SIGNED-ZEROS-LABEL: test_inexact_16777217:
; NO-SIGNED-ZEROS: // %bb.0: // %entry
; NO-SIGNED-ZEROS-NEXT: fcvtzs w8, s0
; NO-SIGNED-ZEROS-NEXT: mov w9, #16777216 // =0x1000000
; NO-SIGNED-ZEROS-NEXT: cmp w8, w9
; NO-SIGNED-ZEROS-NEXT: mov w9, #1 // =0x1
; NO-SIGNED-ZEROS-NEXT: movk w9, #256, lsl #16
; NO-SIGNED-ZEROS-NEXT: csel w8, w8, w9, le
; NO-SIGNED-ZEROS-NEXT: scvtf s0, w8
; NO-SIGNED-ZEROS-NEXT: ret
entry:
%i = fptosi float %x to i32
%clamped = call i32 @llvm.smin.i32(i32 %i, i32 16777217)
%f = sitofp i32 %clamped to float
ret float %f
}

declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.smax.i32(i32, i32)
declare i32 @llvm.umin.i32(i32, i32)
declare i32 @llvm.umax.i32(i32, i32)