9 changes: 9 additions & 0 deletions llvm/include/llvm/IR/Intrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -1037,6 +1037,10 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
def int_llround : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
def int_lrint : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
def int_llrint : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;

// TODO: int operand should be constrained to same number of elements as the result.
def int_ldexp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
llvm_anyint_ty]>;
}

def int_minnum : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
Expand Down Expand Up @@ -1168,6 +1172,11 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in {
llvm_i32_ty,
llvm_metadata_ty,
llvm_metadata_ty ]>;
def int_experimental_constrained_ldexp : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_anyint_ty,
llvm_metadata_ty,
llvm_metadata_ty ]>;
def int_experimental_constrained_sin : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty,
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/IR/RuntimeLibcalls.def
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,11 @@ HANDLE_LIBCALL(LLRINT_F64, "llrint")
HANDLE_LIBCALL(LLRINT_F80, "llrintl")
HANDLE_LIBCALL(LLRINT_F128, "llrintl")
HANDLE_LIBCALL(LLRINT_PPCF128, "llrintl")
HANDLE_LIBCALL(LDEXP_F32, "ldexpf")
HANDLE_LIBCALL(LDEXP_F64, "ldexp")
HANDLE_LIBCALL(LDEXP_F80, "ldexpl")
HANDLE_LIBCALL(LDEXP_F128, "ldexpl")
HANDLE_LIBCALL(LDEXP_PPCF128, "ldexpl")

// Floating point environment
HANDLE_LIBCALL(FEGETENV, "fegetenv")
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/Support/TargetOpcodes.def
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,9 @@ HANDLE_TARGET_OPCODE(G_FLOG2)
/// Floating point base-10 logarithm of a value.
HANDLE_TARGET_OPCODE(G_FLOG10)

/// Floating point x * 2^n
HANDLE_TARGET_OPCODE(G_FLDEXP)

/// Generic FP negation.
HANDLE_TARGET_OPCODE(G_FNEG)

Expand Down Expand Up @@ -762,6 +765,7 @@ HANDLE_TARGET_OPCODE(G_STRICT_FDIV)
HANDLE_TARGET_OPCODE(G_STRICT_FREM)
HANDLE_TARGET_OPCODE(G_STRICT_FMA)
HANDLE_TARGET_OPCODE(G_STRICT_FSQRT)
HANDLE_TARGET_OPCODE(G_STRICT_FLDEXP)

/// read_register intrinsic
HANDLE_TARGET_OPCODE(G_READ_REGISTER)
Expand Down
8 changes: 8 additions & 0 deletions llvm/include/llvm/Target/GenericOpcodes.td
Original file line number Diff line number Diff line change
Expand Up @@ -923,6 +923,13 @@ def G_FLOG10 : GenericInstruction {
let hasSideEffects = false;
}

// Floating point x * 2^n
def G_FLDEXP : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src0, type1:$src1);
let hasSideEffects = false;
}

// Floating point ceiling of a value.
def G_FCEIL : GenericInstruction {
let OutOperandList = (outs type0:$dst);
Expand Down Expand Up @@ -1384,6 +1391,7 @@ def G_STRICT_FDIV : ConstrainedInstruction<G_FDIV>;
def G_STRICT_FREM : ConstrainedInstruction<G_FREM>;
def G_STRICT_FMA : ConstrainedInstruction<G_FMA>;
def G_STRICT_FSQRT : ConstrainedInstruction<G_FSQRT>;
def G_STRICT_FLDEXP : ConstrainedInstruction<G_FLDEXP>;

//------------------------------------------------------------------------------
// Memory intrinsics
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ def : GINodeEquiv<G_FREM, frem>;
def : GINodeEquiv<G_FPOW, fpow>;
def : GINodeEquiv<G_FEXP2, fexp2>;
def : GINodeEquiv<G_FLOG2, flog2>;
def : GINodeEquiv<G_FLDEXP, fldexp>;
def : GINodeEquiv<G_FCANONICALIZE, fcanonicalize>;
def : GINodeEquiv<G_IS_FPCLASS, is_fpclass>;
def : GINodeEquiv<G_INTRINSIC, intrinsic_wo_chain>;
Expand Down Expand Up @@ -158,6 +159,7 @@ def : GINodeEquiv<G_STRICT_FDIV, strict_fdiv>;
def : GINodeEquiv<G_STRICT_FREM, strict_frem>;
def : GINodeEquiv<G_STRICT_FMA, strict_fma>;
def : GINodeEquiv<G_STRICT_FSQRT, strict_fsqrt>;
def : GINodeEquiv<G_STRICT_FLDEXP, strict_fldexp>;

// Broadly speaking G_LOAD is equivalent to ISD::LOAD but there are some
// complications that tablegen must take care of. For example, Predicates such
Expand Down
9 changes: 9 additions & 0 deletions llvm/include/llvm/Target/TargetSelectionDAG.td
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,9 @@ def SDTFPToIntOp : SDTypeProfile<1, 1, [ // fp_to_[su]int
def SDTFPToIntSatOp : SDTypeProfile<1, 2, [ // fp_to_[su]int_sat
SDTCisInt<0>, SDTCisFP<1>, SDTCisSameNumEltsAs<0, 1>, SDTCisVT<2, OtherVT>
]>;
def SDTFPExpOp : SDTypeProfile<1, 2, [ // ldexp
SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>
]>;
def SDTExtInreg : SDTypeProfile<1, 2, [ // sext_inreg
SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisVT<2, OtherVT>,
SDTCisVTSmallerThanOp<2, 1>
Expand Down Expand Up @@ -499,6 +502,7 @@ def fcos : SDNode<"ISD::FCOS" , SDTFPUnaryOp>;
def fexp2 : SDNode<"ISD::FEXP2" , SDTFPUnaryOp>;
def fpow : SDNode<"ISD::FPOW" , SDTFPBinOp>;
def flog2 : SDNode<"ISD::FLOG2" , SDTFPUnaryOp>;
def fldexp : SDNode<"ISD::FLDEXP" , SDTFPExpOp>;
def frint : SDNode<"ISD::FRINT" , SDTFPUnaryOp>;
def ftrunc : SDNode<"ISD::FTRUNC" , SDTFPUnaryOp>;
def fceil : SDNode<"ISD::FCEIL" , SDTFPUnaryOp>;
Expand Down Expand Up @@ -549,6 +553,8 @@ def strict_fexp2 : SDNode<"ISD::STRICT_FEXP2",
SDTFPUnaryOp, [SDNPHasChain]>;
def strict_fpow : SDNode<"ISD::STRICT_FPOW",
SDTFPBinOp, [SDNPHasChain]>;
def strict_fldexp : SDNode<"ISD::STRICT_FLDEXP",
SDTFPExpOp, [SDNPHasChain]>;
def strict_flog2 : SDNode<"ISD::STRICT_FLOG2",
SDTFPUnaryOp, [SDNPHasChain]>;
def strict_frint : SDNode<"ISD::STRICT_FRINT",
Expand Down Expand Up @@ -1449,6 +1455,9 @@ def any_fexp2 : PatFrags<(ops node:$src),
def any_fpow : PatFrags<(ops node:$lhs, node:$rhs),
[(strict_fpow node:$lhs, node:$rhs),
(fpow node:$lhs, node:$rhs)]>;
def any_fldexp : PatFrags<(ops node:$lhs, node:$rhs),
[(strict_fldexp node:$lhs, node:$rhs),
(fldexp node:$lhs, node:$rhs)]>;
def any_flog2 : PatFrags<(ops node:$src),
[(strict_flog2 node:$src),
(flog2 node:$src)]>;
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1759,6 +1759,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_FLOG2;
case Intrinsic::log10:
return TargetOpcode::G_FLOG10;
case Intrinsic::ldexp:
return TargetOpcode::G_FLDEXP;
case Intrinsic::nearbyint:
return TargetOpcode::G_FNEARBYINT;
case Intrinsic::pow:
Expand Down Expand Up @@ -1851,6 +1853,8 @@ static unsigned getConstrainedOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_STRICT_FMA;
case Intrinsic::experimental_constrained_sqrt:
return TargetOpcode::G_STRICT_FSQRT;
case Intrinsic::experimental_constrained_ldexp:
return TargetOpcode::G_STRICT_FLDEXP;
default:
return 0;
}
Expand Down
65 changes: 57 additions & 8 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(LOG_F);
case TargetOpcode::G_FLOG2:
RTLIBCASE(LOG2_F);
case TargetOpcode::G_FLDEXP:
RTLIBCASE(LDEXP_F);
case TargetOpcode::G_FCEIL:
RTLIBCASE(CEIL_F);
case TargetOpcode::G_FFLOOR:
Expand Down Expand Up @@ -826,6 +828,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
case TargetOpcode::G_FLOG10:
case TargetOpcode::G_FLOG:
case TargetOpcode::G_FLOG2:
case TargetOpcode::G_FLDEXP:
case TargetOpcode::G_FEXP:
case TargetOpcode::G_FEXP2:
case TargetOpcode::G_FCEIL:
Expand Down Expand Up @@ -1413,6 +1416,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_FLDEXP:
case TargetOpcode::G_STRICT_FLDEXP:
return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
}
}

Expand Down Expand Up @@ -2553,14 +2559,30 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_FPOWI: {
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_FPOWI:
case TargetOpcode::G_FLDEXP:
case TargetOpcode::G_STRICT_FLDEXP: {
if (TypeIdx == 0) {
if (MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
return UnableToLegalize;

Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
Observer.changedInstr(MI);
return Legalized;
}

if (TypeIdx == 1) {
// For some reason SelectionDAG tries to promote to a libcall without
// actually changing the integer type for promotion.
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
Observer.changedInstr(MI);
return Legalized;
}

return UnableToLegalize;
}
case TargetOpcode::G_INTTOPTR:
if (TypeIdx != 1)
Expand Down Expand Up @@ -4136,6 +4158,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FLOG:
case G_FLOG2:
case G_FLOG10:
case G_FLDEXP:
case G_FNEARBYINT:
case G_FCEIL:
case G_FFLOOR:
Expand Down Expand Up @@ -4211,6 +4234,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_STRICT_FSUB:
case G_STRICT_FMUL:
case G_STRICT_FMA:
case G_STRICT_FLDEXP:
return fewerElementsVectorMultiEltType(GMI, NumElts);
case G_ICMP:
case G_FCMP:
Expand Down Expand Up @@ -5592,6 +5616,31 @@ LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
return UnableToLegalize;
}

LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
if (TypeIdx != 1)
return UnableToLegalize;

MachineIRBuilder &B = MIRBuilder;
Register ExpReg = MI.getOperand(2).getReg();
LLT ExpTy = MRI.getType(ExpReg);

unsigned ClampSize = NarrowTy.getScalarSizeInBits();

// Clamp the exponent to the range of the target type.
auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);

auto Trunc = B.buildTrunc(NarrowTy, Clamp);
Observer.changingInstr(MI);
MI.getOperand(2).setReg(Trunc.getReg(0));
Observer.changedInstr(MI);
return Legalized;
}

LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBitCount(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
Expand Down
137 changes: 137 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,8 @@ class SelectionDAGLegalize {
SDValue ExpandFCOPYSIGN(SDNode *Node) const;
SDValue ExpandFABS(SDNode *Node) const;
SDValue ExpandFNEG(SDNode *Node) const;
SDValue expandLdexp(SDNode *Node) const;

SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain);
void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl,
SmallVectorImpl<SDValue> &Results);
Expand Down Expand Up @@ -2313,6 +2315,118 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr, MachinePointerInfo()));
}

SDValue SelectionDAGLegalize::expandLdexp(SDNode *Node) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
SDValue X = Node->getOperand(0);
SDValue N = Node->getOperand(1);
EVT ExpVT = N.getValueType();
EVT AsIntVT = VT.changeTypeToInteger();
if (AsIntVT == EVT()) // TODO: How to handle f80?
return SDValue();

if (Node->getOpcode() == ISD::STRICT_FLDEXP) // TODO
return SDValue();

SDNodeFlags NSW;
NSW.setNoSignedWrap(true);
SDNodeFlags NUW_NSW;
NUW_NSW.setNoUnsignedWrap(true);
NUW_NSW.setNoSignedWrap(true);

EVT SetCCVT =
TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ExpVT);
const fltSemantics &FltSem = SelectionDAG::EVTToAPFloatSemantics(VT);

const APFloat::ExponentType MaxExpVal = APFloat::semanticsMaxExponent(FltSem);
const APFloat::ExponentType MinExpVal = APFloat::semanticsMinExponent(FltSem);
const int Precision = APFloat::semanticsPrecision(FltSem);

const SDValue MaxExp = DAG.getConstant(MaxExpVal, dl, ExpVT);
const SDValue MinExp = DAG.getConstant(MinExpVal, dl, ExpVT);

const SDValue DoubleMaxExp = DAG.getConstant(2 * MaxExpVal, dl, ExpVT);

const APFloat One(FltSem, "1.0");
APFloat ScaleUpK = scalbn(One, MaxExpVal, APFloat::rmNearestTiesToEven);

// Offset by precision to avoid denormal range.
APFloat ScaleDownK =
scalbn(One, MinExpVal + Precision, APFloat::rmNearestTiesToEven);

// TODO: Should really introduce control flow and use a block for the >
// MaxExp, < MinExp cases

// First, handle exponents Exp > MaxExp and scale down.
SDValue NGtMaxExp = DAG.getSetCC(dl, SetCCVT, N, MaxExp, ISD::SETGT);

SDValue DecN0 = DAG.getNode(ISD::SUB, dl, ExpVT, N, MaxExp, NSW);
SDValue ClampMaxVal = DAG.getConstant(3 * MaxExpVal, dl, ExpVT);
SDValue ClampN_Big = DAG.getNode(ISD::SMIN, dl, ExpVT, N, ClampMaxVal);
SDValue DecN1 =
DAG.getNode(ISD::SUB, dl, ExpVT, ClampN_Big, DoubleMaxExp, NSW);

SDValue ScaleUpTwice =
DAG.getSetCC(dl, SetCCVT, N, DoubleMaxExp, ISD::SETUGT);

const SDValue ScaleUpVal = DAG.getConstantFP(ScaleUpK, dl, VT);
SDValue ScaleUp0 = DAG.getNode(ISD::FMUL, dl, VT, X, ScaleUpVal);
SDValue ScaleUp1 = DAG.getNode(ISD::FMUL, dl, VT, ScaleUp0, ScaleUpVal);

SDValue SelectN_Big =
DAG.getNode(ISD::SELECT, dl, ExpVT, ScaleUpTwice, DecN1, DecN0);
SDValue SelectX_Big =
DAG.getNode(ISD::SELECT, dl, VT, ScaleUpTwice, ScaleUp1, ScaleUp0);

// Now handle exponents Exp < MinExp
SDValue NLtMinExp = DAG.getSetCC(dl, SetCCVT, N, MinExp, ISD::SETLT);

SDValue Increment0 = DAG.getConstant(-(MinExpVal + Precision), dl, ExpVT);
SDValue Increment1 = DAG.getConstant(-2 * (MinExpVal + Precision), dl, ExpVT);

SDValue IncN0 = DAG.getNode(ISD::ADD, dl, ExpVT, N, Increment0, NUW_NSW);

SDValue ClampMinVal =
DAG.getConstant(3 * MinExpVal + 2 * Precision, dl, ExpVT);
SDValue ClampN_Small = DAG.getNode(ISD::SMAX, dl, ExpVT, N, ClampMinVal);
SDValue IncN1 =
DAG.getNode(ISD::ADD, dl, ExpVT, ClampN_Small, Increment1, NSW);

const SDValue ScaleDownVal = DAG.getConstantFP(ScaleDownK, dl, VT);
SDValue ScaleDown0 = DAG.getNode(ISD::FMUL, dl, VT, X, ScaleDownVal);
SDValue ScaleDown1 = DAG.getNode(ISD::FMUL, dl, VT, ScaleDown0, ScaleDownVal);

SDValue ScaleDownTwice = DAG.getSetCC(
dl, SetCCVT, N, DAG.getConstant(2 * MinExpVal + Precision, dl, ExpVT),
ISD::SETULT);

SDValue SelectN_Small =
DAG.getNode(ISD::SELECT, dl, ExpVT, ScaleDownTwice, IncN1, IncN0);
SDValue SelectX_Small =
DAG.getNode(ISD::SELECT, dl, VT, ScaleDownTwice, ScaleDown1, ScaleDown0);

// Now combine the two out of range exponent handling cases with the base
// case.
SDValue NewX = DAG.getNode(
ISD::SELECT, dl, VT, NGtMaxExp, SelectX_Big,
DAG.getNode(ISD::SELECT, dl, VT, NLtMinExp, SelectX_Small, X));

SDValue NewN = DAG.getNode(
ISD::SELECT, dl, ExpVT, NGtMaxExp, SelectN_Big,
DAG.getNode(ISD::SELECT, dl, ExpVT, NLtMinExp, SelectN_Small, N));

SDValue BiasedN = DAG.getNode(ISD::ADD, dl, ExpVT, NewN, MaxExp, NSW);

SDValue ExponentShiftAmt =
DAG.getShiftAmountConstant(Precision - 1, ExpVT, dl);
SDValue CastExpToValTy = DAG.getZExtOrTrunc(BiasedN, dl, AsIntVT);

SDValue AsInt = DAG.getNode(ISD::SHL, dl, AsIntVT, CastExpToValTy,
ExponentShiftAmt, NUW_NSW);
SDValue AsFP = DAG.getNode(ISD::BITCAST, dl, VT, AsInt);
return DAG.getNode(ISD::FMUL, dl, VT, NewX, AsFP);
}

/// This function is responsible for legalizing a
/// INT_TO_FP operation of the specified operand when the target requests that
/// we expand it. At this point, we know that the result and operand types are
Expand Down Expand Up @@ -3250,6 +3364,23 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
break;
}
case ISD::FLDEXP:
case ISD::STRICT_FLDEXP: {
EVT VT = Node->getValueType(0);
RTLIB::Libcall LC = RTLIB::getLDEXP(VT);
// Use the LibCall instead, it is very likely faster
// FIXME: Use separate LibCall action.
if (TLI.getLibcallName(LC))
break;

if (SDValue Expanded = expandLdexp(Node)) {
Results.push_back(Expanded);
if (Node->getOpcode() == ISD::STRICT_FLDEXP)
Results.push_back(Expanded.getValue(1));
}

break;
}
case ISD::FMAD:
llvm_unreachable("Illegal fmad should never be formed");

Expand Down Expand Up @@ -4142,6 +4273,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::ROUNDEVEN_F128,
RTLIB::ROUNDEVEN_PPCF128, Results);
break;
case ISD::FLDEXP:
case ISD::STRICT_FLDEXP:
ExpandFPLibCall(Node, RTLIB::LDEXP_F32, RTLIB::LDEXP_F64, RTLIB::LDEXP_F80,
RTLIB::LDEXP_F128, RTLIB::LDEXP_PPCF128, Results);
break;
case ISD::FPOWI:
case ISD::STRICT_FPOWI: {
RTLIB::Libcall LC = RTLIB::getPOWI(Node->getSimpleValueType(0));
Expand Down Expand Up @@ -4871,6 +5007,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(Tmp4.getValue(1));
break;
case ISD::FCOPYSIGN:
case ISD::FLDEXP:
case ISD::FPOWI: {
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = Node->getOperand(1);
Expand Down
29 changes: 22 additions & 7 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,9 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FPOW:
case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break;
case ISD::STRICT_FPOWI:
case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break;
case ISD::FPOWI:
case ISD::FLDEXP:
case ISD::STRICT_FLDEXP: R = SoftenFloatRes_ExpOp(N); break;
case ISD::STRICT_FREM:
case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
case ISD::STRICT_FRINT:
Expand Down Expand Up @@ -603,13 +605,17 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
RTLIB::POW_PPCF128));
}

SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
SDValue DAGTypeLegalizer::SoftenFloatRes_ExpOp(SDNode *N) {
bool IsStrict = N->isStrictFPOpcode();
unsigned Offset = IsStrict ? 1 : 0;
assert((N->getOperand(1 + Offset).getValueType() == MVT::i16 ||
N->getOperand(1 + Offset).getValueType() == MVT::i32) &&
"Unsupported power type!");
RTLIB::Libcall LC = RTLIB::getPOWI(N->getValueType(0));
bool IsPowI =
N->getOpcode() == ISD::FPOWI || N->getOpcode() == ISD::STRICT_FPOWI;

RTLIB::Libcall LC = IsPowI ? RTLIB::getPOWI(N->getValueType(0))
: RTLIB::getLDEXP(N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi.");
if (!TLI.getLibcallName(LC)) {
// Some targets don't have a powi libcall; use pow instead.
Expand Down Expand Up @@ -1274,6 +1280,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break;
case ISD::STRICT_FPOWI:
case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break;
case ISD::FLDEXP:
case ISD::STRICT_FLDEXP: ExpandFloatRes_FLDEXP(N, Lo, Hi); break;
case ISD::FREEZE: ExpandFloatRes_FREEZE(N, Lo, Hi); break;
case ISD::STRICT_FRINT:
case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break;
Expand Down Expand Up @@ -1569,6 +1577,11 @@ void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
ExpandFloatRes_Binary(N, RTLIB::getPOWI(N->getValueType(0)), Lo, Hi);
}

void DAGTypeLegalizer::ExpandFloatRes_FLDEXP(SDNode *N, SDValue &Lo,
SDValue &Hi) {
ExpandFloatRes_Binary(N, RTLIB::getLDEXP(N->getValueType(0)), Lo, Hi);
}

void DAGTypeLegalizer::ExpandFloatRes_FREEZE(SDNode *N,
SDValue &Lo, SDValue &Hi) {
assert(N->getValueType(0) == MVT::ppcf128 &&
Expand Down Expand Up @@ -2310,7 +2323,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FMA: // FMA is same as FMAD
case ISD::FMAD: R = PromoteFloatRes_FMAD(N); break;

case ISD::FPOWI: R = PromoteFloatRes_FPOWI(N); break;
case ISD::FPOWI:
case ISD::FLDEXP: R = PromoteFloatRes_ExpOp(N); break;

case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break;
case ISD::LOAD: R = PromoteFloatRes_LOAD(N); break;
Expand Down Expand Up @@ -2479,7 +2493,7 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) {
}

// Promote the Float (first) operand and retain the Integer (second) operand
SDValue DAGTypeLegalizer::PromoteFloatRes_FPOWI(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteFloatRes_ExpOp(SDNode *N) {
EVT VT = N->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Op0 = GetPromotedFloat(N->getOperand(0));
Expand Down Expand Up @@ -2676,7 +2690,8 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::FMA: // FMA is same as FMAD
case ISD::FMAD: R = SoftPromoteHalfRes_FMAD(N); break;

case ISD::FPOWI: R = SoftPromoteHalfRes_FPOWI(N); break;
case ISD::FPOWI:
case ISD::FLDEXP: R = SoftPromoteHalfRes_ExpOp(N); break;

case ISD::LOAD: R = SoftPromoteHalfRes_LOAD(N); break;
case ISD::SELECT: R = SoftPromoteHalfRes_SELECT(N); break;
Expand Down Expand Up @@ -2788,7 +2803,7 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FMAD(SDNode *N) {
return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
}

SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FPOWI(SDNode *N) {
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_ExpOp(SDNode *N) {
EVT OVT = N->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
Expand Down
31 changes: 17 additions & 14 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1714,10 +1714,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SDIVFIXSAT:
case ISD::UDIVFIX:
case ISD::UDIVFIXSAT: Res = PromoteIntOp_FIX(N); break;

case ISD::FPOWI:
case ISD::STRICT_FPOWI: Res = PromoteIntOp_FPOWI(N); break;

case ISD::STRICT_FPOWI:
case ISD::FLDEXP:
case ISD::STRICT_FLDEXP: Res = PromoteIntOp_ExpOp(N); break;
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
case ISD::VECREDUCE_AND:
Expand Down Expand Up @@ -2201,26 +2201,29 @@ SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) {
0);
}

SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) {
bool IsStrict = N->isStrictFPOpcode();
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();

// The integer operand is the last operand in FPOWI (so the result and
// floating point operand is already type legalized).
bool IsPowI =
N->getOpcode() == ISD::FPOWI || N->getOpcode() == ISD::STRICT_FPOWI;

// The integer operand is the last operand in FPOWI (or FLDEXP) (so the result
// and floating point operand is already type legalized).
RTLIB::Libcall LC = IsPowI ? RTLIB::getPOWI(N->getValueType(0))
: RTLIB::getLDEXP(N->getValueType(0));

if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
SDValue Op = SExtPromotedInteger(N->getOperand(1));
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0);
}

// We can't just promote the exponent type in FPOWI, since we want to lower
// the node to a libcall and we if we promote to a type larger than
// sizeof(int) the libcall might not be according to the targets ABI. Instead
// we rewrite to a libcall here directly, letting makeLibCall handle promotion
// if the target accepts it according to shouldSignExtendTypeInLibCall.
RTLIB::Libcall LC = RTLIB::getPOWI(N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi.");
if (!TLI.getLibcallName(LC)) {
// Some targets don't have a powi libcall; use pow instead.
// FIXME: Implement this if some target needs it.
DAG.getContext()->emitError("Don't know how to promote fpowi to fpow");
return DAG.getUNDEF(N->getValueType(0));
}

unsigned OpOffset = IsStrict ? 1 : 0;
// The exponent should fit in a sizeof(int) type for the libcall to be valid.
assert(DAG.getLibInfo().getIntSize() ==
Expand Down
19 changes: 10 additions & 9 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N);
SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_FIX(SDNode *N);
SDValue PromoteIntOp_FPOWI(SDNode *N);
SDValue PromoteIntOp_ExpOp(SDNode *N);
SDValue PromoteIntOp_VECREDUCE(SDNode *N);
SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SET_ROUNDING(SDNode *N);
Expand Down Expand Up @@ -563,7 +563,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue SoftenFloatRes_BF16_TO_FP(SDNode *N);
SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
SDValue SoftenFloatRes_FPOW(SDNode *N);
SDValue SoftenFloatRes_FPOWI(SDNode *N);
SDValue SoftenFloatRes_ExpOp(SDNode *N);
SDValue SoftenFloatRes_FREEZE(SDNode *N);
SDValue SoftenFloatRes_FREM(SDNode *N);
SDValue SoftenFloatRes_FRINT(SDNode *N);
Expand Down Expand Up @@ -641,6 +641,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FLDEXP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
Expand Down Expand Up @@ -690,7 +691,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteFloatRes_FCOPYSIGN(SDNode *N);
SDValue PromoteFloatRes_FMAD(SDNode *N);
SDValue PromoteFloatRes_FPOWI(SDNode *N);
SDValue PromoteFloatRes_ExpOp(SDNode *N);
SDValue PromoteFloatRes_FP_ROUND(SDNode *N);
SDValue PromoteFloatRes_LOAD(SDNode *N);
SDValue PromoteFloatRes_SELECT(SDNode *N);
Expand Down Expand Up @@ -731,7 +732,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue SoftPromoteHalfRes_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue SoftPromoteHalfRes_FCOPYSIGN(SDNode *N);
SDValue SoftPromoteHalfRes_FMAD(SDNode *N);
SDValue SoftPromoteHalfRes_FPOWI(SDNode *N);
SDValue SoftPromoteHalfRes_ExpOp(SDNode *N);
SDValue SoftPromoteHalfRes_FP_ROUND(SDNode *N);
SDValue SoftPromoteHalfRes_LOAD(SDNode *N);
SDValue SoftPromoteHalfRes_SELECT(SDNode *N);
Expand Down Expand Up @@ -784,7 +785,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
SDValue ScalarizeVecRes_FPOWI(SDNode *N);
SDValue ScalarizeVecRes_ExpOp(SDNode *N);
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
Expand Down Expand Up @@ -860,8 +861,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
Expand Down Expand Up @@ -906,7 +906,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
SDValue SplitVecOp_VSETCC(SDNode *N);
SDValue SplitVecOp_FP_ROUND(SDNode *N);
SDValue SplitVecOp_FCOPYSIGN(SDNode *N);
SDValue SplitVecOp_FPOpDifferentTypes(SDNode *N);
SDValue SplitVecOp_FP_TO_XINT_SAT(SDNode *N);

//===--------------------------------------------------------------------===//
Expand Down Expand Up @@ -982,7 +982,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N);
SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
SDValue WidenVecRes_IS_FPCLASS(SDNode *N);
SDValue WidenVecRes_POWI(SDNode *N);
SDValue WidenVecRes_ExpOp(SDNode *N);
SDValue WidenVecRes_Unary(SDNode *N);
SDValue WidenVecRes_InregOp(SDNode *N);

Expand Down Expand Up @@ -1012,6 +1012,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVecOp_VECREDUCE(SDNode *N);
SDValue WidenVecOp_VECREDUCE_SEQ(SDNode *N);
SDValue WidenVecOp_VP_REDUCE(SDNode *N);
SDValue WidenVecOp_ExpOp(SDNode *N);

/// Helper function to generate a set of operations to perform
/// a vector operation for a wider type.
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FSQRT:
case ISD::FSIN:
case ISD::FCOS:
case ISD::FLDEXP:
case ISD::FPOWI:
case ISD::FPOW:
case ISD::FLOG:
Expand Down
108 changes: 66 additions & 42 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
case ISD::FPOWI: R = ScalarizeVecRes_ExpOp(N); break;
case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
Expand Down Expand Up @@ -126,6 +126,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMAXNUM_IEEE:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
case ISD::FLDEXP:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
Expand Down Expand Up @@ -348,10 +349,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
N->getOperand(1));
}

SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
SDValue DAGTypeLegalizer::ScalarizeVecRes_ExpOp(SDNode *N) {
SDValue Op = GetScalarizedVector(N->getOperand(0));
return DAG.getNode(ISD::FPOWI, SDLoc(N),
Op.getValueType(), Op, N->getOperand(1));
return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op,
N->getOperand(1));
}

SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
Expand Down Expand Up @@ -960,8 +961,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break;
case ISD::FPOWI:
case ISD::FLDEXP:
case ISD::FCOPYSIGN: SplitVecRes_FPOp_MultiType(N, Lo, Hi); break;
case ISD::IS_FPCLASS: SplitVecRes_IS_FPCLASS(N, Lo, Hi); break;
case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
case ISD::SPLAT_VECTOR:
Expand Down Expand Up @@ -1463,31 +1465,30 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MPI, SmallestAlign);
}

void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDLoc dl(N);
GetSplitVector(N->getOperand(0), Lo, Hi);
Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1));
Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
}

void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo,
SDValue &Hi) {
// Handle splitting an FP where the second operand does not match the first
// type. The second operand may be a scalar, or a vector that has exactly as
// many elements as the first
void DAGTypeLegalizer::SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue LHSLo, LHSHi;
GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
SDLoc DL(N);

SDValue RHSLo, RHSHi;
SDValue RHS = N->getOperand(1);
EVT RHSVT = RHS.getValueType();
if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector)
GetSplitVector(RHS, RHSLo, RHSHi);
else
std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS));

if (RHSVT.isVector()) {
if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector)
GetSplitVector(RHS, RHSLo, RHSHi);
else
std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS));

Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLo.getValueType(), LHSLo, RHSLo);
Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi);
Lo = DAG.getNode(N->getOpcode(), DL, LHSLo.getValueType(), LHSLo, RHSLo);
Hi = DAG.getNode(N->getOpcode(), DL, LHSHi.getValueType(), LHSHi, RHSHi);
} else {
Lo = DAG.getNode(N->getOpcode(), DL, LHSLo.getValueType(), LHSLo, RHS);
Hi = DAG.getNode(N->getOpcode(), DL, LHSHi.getValueType(), LHSHi, RHS);
}
}

void DAGTypeLegalizer::SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo,
Expand Down Expand Up @@ -2846,7 +2847,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_FP_ROUND:
case ISD::VP_FP_ROUND:
case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
case ISD::FCOPYSIGN: Res = SplitVecOp_FPOpDifferentTypes(N); break;
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
Expand Down Expand Up @@ -2900,6 +2901,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::FTRUNC:
Res = SplitVecOp_UnaryOp(N);
break;
case ISD::FLDEXP:
Res = SplitVecOp_FPOpDifferentTypes(N);
break;

case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
Expand Down Expand Up @@ -3845,10 +3849,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
}

SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {
// The result (and the first input) has a legal vector type, but the second
// input needs splitting.

// Split a vector type in an FP binary operation where the second operand has a
// different type from the first.
//
// The result (and the first input) has a legal vector type, but the second
// input needs splitting.
SDValue DAGTypeLegalizer::SplitVecOp_FPOpDifferentTypes(SDNode *N) {
SDLoc DL(N);

EVT LHSLoVT, LHSHiVT;
Expand All @@ -3864,8 +3870,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {
SDValue RHSLo, RHSHi;
std::tie(RHSLo, RHSHi) = DAG.SplitVector(N->getOperand(1), DL);

SDValue Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLoVT, LHSLo, RHSLo);
SDValue Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHiVT, LHSHi, RHSHi);
SDValue Lo = DAG.getNode(N->getOpcode(), DL, LHSLoVT, LHSLo, RHSLo);
SDValue Hi = DAG.getNode(N->getOpcode(), DL, LHSHiVT, LHSHi, RHSHi);

return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0), Lo, Hi);
}
Expand Down Expand Up @@ -4075,8 +4081,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_IS_FPCLASS(N);
break;

case ISD::FLDEXP:
case ISD::FPOWI:
Res = WidenVecRes_POWI(N);
Res = WidenVecRes_ExpOp(N);
break;

case ISD::ANY_EXTEND_VECTOR_INREG:
Expand Down Expand Up @@ -4433,10 +4440,18 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
for (unsigned i = 1; i < NumOpers; ++i) {
SDValue Oper = N->getOperand(i);

if (Oper.getValueType().isVector()) {
assert(Oper.getValueType() == N->getValueType(0) &&
"Invalid operand type to widen!");
Oper = GetWidenedVector(Oper);
EVT OpVT = Oper.getValueType();
if (OpVT.isVector()) {
if (getTypeAction(OpVT) == TargetLowering::TypeWidenVector)
Oper = GetWidenedVector(Oper);
else {
EVT WideOpVT =
EVT::getVectorVT(*DAG.getContext(), OpVT.getVectorElementType(),
WidenVT.getVectorElementCount());
Oper = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
DAG.getUNDEF(WideOpVT), Oper,
DAG.getVectorIdxConstant(0, dl));
}
}

InOps.push_back(Oper);
Expand All @@ -4454,9 +4469,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
for (unsigned i = 0; i < NumOpers; ++i) {
SDValue Op = InOps[i];

if (Op.getValueType().isVector())
Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Op,
EVT OpVT = Op.getValueType();
if (OpVT.isVector()) {
EVT OpExtractVT =
EVT::getVectorVT(*DAG.getContext(), OpVT.getVectorElementType(),
VT.getVectorElementCount());
Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpExtractVT, Op,
DAG.getVectorIdxConstant(Idx, dl));
}

EOps.push_back(Op);
}
Expand All @@ -4480,8 +4500,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
for (unsigned i = 0; i < NumOpers; ++i) {
SDValue Op = InOps[i];

if (Op.getValueType().isVector())
Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, Op,
EVT OpVT = Op.getValueType();
if (OpVT.isVector())
Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
OpVT.getVectorElementType(), Op,
DAG.getVectorIdxConstant(Idx, dl));

EOps.push_back(Op);
Expand Down Expand Up @@ -4790,11 +4812,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_IS_FPCLASS(SDNode *N) {
N->getFlags());
}

SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
SDValue DAGTypeLegalizer::WidenVecRes_ExpOp(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp = GetWidenedVector(N->getOperand(0));
SDValue ShOp = N->getOperand(1);
return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp);
SDValue RHS = N->getOperand(1);
SDValue ExpOp = RHS.getValueType().isVector() ? GetWidenedVector(RHS) : RHS;

return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ExpOp);
}

SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4977,7 +4977,8 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::FROUND:
case ISD::FROUNDEVEN:
case ISD::FRINT:
case ISD::FNEARBYINT: {
case ISD::FNEARBYINT:
case ISD::FLDEXP: {
if (SNaN)
return true;
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6447,6 +6447,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), Flags));
return;
case Intrinsic::ldexp:
setValue(&I, DAG.getNode(ISD::FLDEXP, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), Flags));
return;
case Intrinsic::arithmetic_fence: {
setValue(&I, DAG.getNode(ISD::ARITH_FENCE, sdl,
getValue(I.getArgOperand(0)).getValueType(),
Expand Down Expand Up @@ -8635,6 +8641,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
if (visitUnaryFloatCall(I, ISD::FEXP2))
return;
break;
case LibFunc_ldexp:
case LibFunc_ldexpf:
case LibFunc_ldexpl:
if (visitBinaryFloatCall(I, ISD::FLDEXP))
return;
break;
case LibFunc_memcmp:
if (visitMemCmpBCmpCall(I))
return;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::UMIN: return "umin";
case ISD::UMAX: return "umax";

case ISD::FLDEXP: return "fldexp";
case ISD::STRICT_FLDEXP: return "strict_fldexp";
case ISD::FPOWI: return "fpowi";
case ISD::STRICT_FPOWI: return "strict_fpowi";
case ISD::SETCC: return "setcc";
Expand Down
15 changes: 14 additions & 1 deletion llvm/lib/CodeGen/TargetLoweringBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,13 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
if (TT.isOSOpenBSD()) {
setLibcallName(RTLIB::STACKPROTECTOR_CHECK_FAIL, nullptr);
}

if (TT.isOSWindows() && !TT.isOSCygMing()) {
setLibcallName(RTLIB::LDEXP_F32, nullptr);
setLibcallName(RTLIB::LDEXP_F80, nullptr);
setLibcallName(RTLIB::LDEXP_F128, nullptr);
setLibcallName(RTLIB::LDEXP_PPCF128, nullptr);
}
}

/// GetFPLibCall - Helper to return the right libcall for the given floating
Expand Down Expand Up @@ -498,6 +505,11 @@ RTLIB::Libcall RTLIB::getPOWI(EVT RetVT) {
POWI_PPCF128);
}

RTLIB::Libcall RTLIB::getLDEXP(EVT RetVT) {
return getFPLibCall(RetVT, LDEXP_F32, LDEXP_F64, LDEXP_F80, LDEXP_F128,
LDEXP_PPCF128);
}

RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order,
MVT VT) {
unsigned ModeN, ModelN;
Expand Down Expand Up @@ -845,7 +857,8 @@ void TargetLoweringBase::initActions() {
setOperationAction({ISD::BITREVERSE, ISD::PARITY}, VT, Expand);

// These library functions default to expand.
setOperationAction({ISD::FROUND, ISD::FROUNDEVEN, ISD::FPOWI}, VT, Expand);
setOperationAction({ISD::FROUND, ISD::FROUNDEVEN, ISD::FPOWI, ISD::FLDEXP},
VT, Expand);

// These operations default to expand for vector types.
if (VT.isVector())
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
case ISD::FFLOOR:
case ISD::FMINNUM:
case ISD::FMAXNUM:
case ISD::FLDEXP:
case AMDGPUISD::FRACT:
case AMDGPUISD::CLAMP:
case AMDGPUISD::COS_HW:
Expand All @@ -179,7 +180,6 @@ bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
case AMDGPUISD::RCP:
case AMDGPUISD::RSQ:
case AMDGPUISD::RCP_IFLAG:
case AMDGPUISD::LDEXP:
// On gfx10, all 16-bit instructions preserve the high bits.
return Subtarget->getGeneration() <= AMDGPUSubtarget::GFX9;
case ISD::FP_ROUND:
Expand Down
7 changes: 3 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2613,7 +2613,7 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
ShAmt);
// On GCN, use LDEXP directly.
if (Subtarget->isGCN())
return DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f32, FVal, ShAmt);
return DAG.getNode(ISD::FLDEXP, SL, MVT::f32, FVal, ShAmt);

// Otherwise, align 'ShAmt' to the exponent part and add it into the exponent
// part directly to emulate the multiplication of 2^ShAmt. That 8-bit
Expand Down Expand Up @@ -2646,7 +2646,7 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,

SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo);

SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi,
SDValue LdExp = DAG.getNode(ISD::FLDEXP, SL, MVT::f64, CvtHi,
DAG.getConstant(32, SL, MVT::i32));
// TODO: Should this propagate fast-math-flags?
return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
Expand Down Expand Up @@ -4637,7 +4637,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(RCP_IFLAG)
NODE_NAME_CASE(FMUL_LEGACY)
NODE_NAME_CASE(RSQ_CLAMP)
NODE_NAME_CASE(LDEXP)
NODE_NAME_CASE(FP_CLASS)
NODE_NAME_CASE(DOT4)
NODE_NAME_CASE(CARRY)
Expand Down Expand Up @@ -5044,7 +5043,7 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
// TODO: Need is known positive check.
return false;
}
case AMDGPUISD::LDEXP:
case ISD::FLDEXP:
case AMDGPUISD::FRACT: {
if (SNaN)
return true;
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,6 @@ enum NodeType : unsigned {
RCP_IFLAG,
FMUL_LEGACY,
RSQ_CLAMP,
LDEXP,
FP_CLASS,
DOT4,
CARRY,
Expand Down
10 changes: 0 additions & 10 deletions llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,6 @@ def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
]>;

def AMDGPULdExpOp : SDTypeProfile<1, 2,
[SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
>;

def AMDGPUFPClassOp : SDTypeProfile<1, 2,
[SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>]
>;
Expand Down Expand Up @@ -128,8 +124,6 @@ def AMDGPUrcp_iflag : SDNode<"AMDGPUISD::RCP_IFLAG", SDTFPUnaryOp>;
// out = 1.0 / sqrt(a) result clamped to +/- max_float.
def AMDGPUrsq_clamp_impl : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>;

def AMDGPUldexp_impl : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>;

def AMDGPUpkrtz_f16_f32_impl : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>;
def AMDGPUpknorm_i16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>;
def AMDGPUpknorm_u16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>;
Expand Down Expand Up @@ -389,10 +383,6 @@ def AMDGPUcos : PatFrags<(ops node:$src), [(int_amdgcn_cos node:$src),
def AMDGPUfract : PatFrags<(ops node:$src), [(int_amdgcn_fract node:$src),
(AMDGPUfract_impl node:$src)]>;

def AMDGPUldexp : PatFrags<(ops node:$src0, node:$src1),
[(int_amdgcn_ldexp node:$src0, node:$src1),
(AMDGPUldexp_impl node:$src0, node:$src1)]>;

def AMDGPUfp_class : PatFrags<(ops node:$src0, node:$src1),
[(int_amdgcn_class node:$src0, node:$src1),
(AMDGPUfp_class_impl node:$src0, node:$src1)]>;
Expand Down
23 changes: 16 additions & 7 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -911,6 +911,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.legalFor({S32, S64, S16})
.scalarize(0)
.clampScalar(0, S16, S64);

getActionDefinitionsBuilder({G_FLDEXP, G_STRICT_FLDEXP})
.legalFor({{S32, S32}, {S64, S32}, {S16, S16}})
.scalarize(0)
.maxScalarIf(typeIs(0, S16), 1, S16)
.clampScalar(1, S32, S32)
.lower();
} else {
getActionDefinitionsBuilder(G_FSQRT)
.legalFor({S32, S64})
Expand All @@ -929,6 +936,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0)
.clampScalar(0, S32, S64);
}

getActionDefinitionsBuilder({G_FLDEXP, G_STRICT_FLDEXP})
.legalFor({{S32, S32}, {S64, S32}})
.scalarize(0)
.clampScalar(0, S32, S64)
.clampScalar(1, S32, S32)
.lower();
}

getActionDefinitionsBuilder(G_FPTRUNC)
Expand Down Expand Up @@ -2373,9 +2387,7 @@ bool AMDGPULegalizerInfo::legalizeITOFP(
: B.buildUITOFP(S64, Unmerge.getReg(1));

auto CvtLo = B.buildUITOFP(S64, Unmerge.getReg(0));
auto LdExp = B.buildIntrinsic(Intrinsic::amdgcn_ldexp, {S64}, false)
.addUse(CvtHi.getReg(0))
.addUse(ThirtyTwo.getReg(0));
auto LdExp = B.buildFLdexp(S64, CvtHi, ThirtyTwo);

// TODO: Should this propagate fast-math-flags?
B.buildFAdd(Dst, LdExp, CvtLo);
Expand Down Expand Up @@ -2406,10 +2418,7 @@ bool AMDGPULegalizerInfo::legalizeITOFP(
auto Norm2 = B.buildOr(S32, Unmerge2.getReg(1), Adjust);
auto FVal = Signed ? B.buildSITOFP(S32, Norm2) : B.buildUITOFP(S32, Norm2);
auto Scale = B.buildSub(S32, ThirtyTwo, ShAmt);
B.buildIntrinsic(Intrinsic::amdgcn_ldexp, ArrayRef<Register>{Dst},
/*HasSideEffects=*/false)
.addUse(FVal.getReg(0))
.addUse(Scale.getReg(0));
B.buildFLdexp(Dst, FVal, Scale);
MI.eraseFromParent();
return true;
}
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3740,6 +3740,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_FPEXT:
case AMDGPU::G_FEXP2:
case AMDGPU::G_FLOG2:
case AMDGPU::G_FLDEXP:
case AMDGPU::G_FMINNUM:
case AMDGPU::G_FMAXNUM:
case AMDGPU::G_FMINNUM_IEEE:
Expand All @@ -3750,6 +3751,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_STRICT_FSUB:
case AMDGPU::G_STRICT_FMUL:
case AMDGPU::G_STRICT_FMA:
case AMDGPU::G_STRICT_FLDEXP:
case AMDGPU::G_BSWAP: // TODO: Somehow expand for scalar?
case AMDGPU::G_FSHR: // TODO: Expand for scalar
case AMDGPU::G_AMDGPU_FMIN_LEGACY:
Expand Down Expand Up @@ -4213,7 +4215,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_rsq_clamp:
case Intrinsic::amdgcn_fmul_legacy:
case Intrinsic::amdgcn_fma_legacy:
case Intrinsic::amdgcn_ldexp:
case Intrinsic::amdgcn_frexp_mant:
case Intrinsic::amdgcn_frexp_exp:
case Intrinsic::amdgcn_fract:
Expand Down
50 changes: 44 additions & 6 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
MVT::f64, Custom);

setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction({ISD::FLDEXP, ISD::STRICT_FLDEXP}, {MVT::f32, MVT::f64},
Legal);

setOperationAction({ISD::FSIN, ISD::FCOS, ISD::FDIV}, MVT::f32, Custom);
setOperationAction(ISD::FDIV, MVT::f64, Custom);
Expand Down Expand Up @@ -528,7 +530,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,

// F16 - VOP2 Actions.
setOperationAction({ISD::BR_CC, ISD::SELECT_CC}, MVT::f16, Expand);

setOperationAction({ISD::FLDEXP, ISD::STRICT_FLDEXP}, MVT::f16, Custom);
setOperationAction(ISD::FDIV, MVT::f16, Custom);

// F16 - VOP3 Actions.
Expand Down Expand Up @@ -4843,6 +4845,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FMINNUM:
case ISD::FMAXNUM:
return lowerFMINNUM_FMAXNUM(Op, DAG);
case ISD::FLDEXP:
case ISD::STRICT_FLDEXP:
return lowerFLDEXP(Op, DAG);
case ISD::FMA:
return splitTernaryVectorOp(Op, DAG);
case ISD::FP_TO_SINT:
Expand Down Expand Up @@ -5464,6 +5469,40 @@ SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
return Op;
}

SDValue SITargetLowering::lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const {
bool IsStrict = Op.getOpcode() == ISD::STRICT_FLDEXP;
EVT VT = Op.getValueType();
assert(VT == MVT::f16);

SDValue Exp = Op.getOperand(IsStrict ? 2 : 1);
EVT ExpVT = Exp.getValueType();
if (ExpVT == MVT::i16)
return Op;

SDLoc DL(Op);

// Correct the exponent type for f16 to i16.
// Clamp the range of the exponent to the instruction's range.

// TODO: This should be a generic narrowing legalization, and can easily be
// for GlobalISel.

SDValue MinExp = DAG.getConstant(minIntN(16), DL, ExpVT);
SDValue ClampMin = DAG.getNode(ISD::SMAX, DL, ExpVT, Exp, MinExp);

SDValue MaxExp = DAG.getConstant(maxIntN(16), DL, ExpVT);
SDValue Clamp = DAG.getNode(ISD::SMIN, DL, ExpVT, ClampMin, MaxExp);

SDValue TruncExp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Clamp);

if (IsStrict) {
return DAG.getNode(ISD::STRICT_FLDEXP, DL, {VT, MVT::Other},
{Op.getOperand(0), Op.getOperand(1), TruncExp});
}

return DAG.getNode(ISD::FLDEXP, DL, VT, Op.getOperand(0), TruncExp);
}

SDValue SITargetLowering::lowerXMULO(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc SL(Op);
Expand Down Expand Up @@ -7151,8 +7190,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return emitRemovedIntrinsicError(DAG, DL, VT);
}
case Intrinsic::amdgcn_ldexp:
return DAG.getNode(AMDGPUISD::LDEXP, DL, VT,
Op.getOperand(1), Op.getOperand(2));
return DAG.getNode(ISD::FLDEXP, DL, VT, Op.getOperand(1), Op.getOperand(2));

case Intrinsic::amdgcn_fract:
return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
Expand Down Expand Up @@ -10379,6 +10417,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
case ISD::FREM:
case ISD::FP_ROUND:
case ISD::FP_EXTEND:
case ISD::FLDEXP:
case AMDGPUISD::FMUL_LEGACY:
case AMDGPUISD::FMAD_FTZ:
case AMDGPUISD::RCP:
Expand All @@ -10390,7 +10429,6 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
case AMDGPUISD::DIV_FMAS:
case AMDGPUISD::DIV_FIXUP:
case AMDGPUISD::FRACT:
case AMDGPUISD::LDEXP:
case AMDGPUISD::CVT_PKRTZ_F16_F32:
case AMDGPUISD::CVT_F32_UBYTE0:
case AMDGPUISD::CVT_F32_UBYTE1:
Expand Down Expand Up @@ -11976,12 +12014,12 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return performFCanonicalizeCombine(N, DCI);
case AMDGPUISD::RCP:
return performRcpCombine(N, DCI);
case ISD::FLDEXP:
case AMDGPUISD::FRACT:
case AMDGPUISD::RSQ:
case AMDGPUISD::RCP_LEGACY:
case AMDGPUISD::RCP_IFLAG:
case AMDGPUISD::RSQ_CLAMP:
case AMDGPUISD::LDEXP: {
case AMDGPUISD::RSQ_CLAMP: {
// FIXME: This is probably wrong. If src is an sNaN, it won't be quieted
SDValue Src = N->getOperand(0);
if (Src.isUndef())
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
/// Custom lowering for ISD::FP_ROUND for MVT::f16.
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerXMULO(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerXMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;

Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/AMDGPU/VOP2Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,7 @@ let IsNeverUniform = 1 in {
defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>;
defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>;
} // End IsNeverUniform = 1
defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp>;
defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, any_fldexp>;

let ReadsModeReg = 0, mayRaiseFPException = 0 in {
defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>;
Expand Down Expand Up @@ -863,7 +863,7 @@ def : divergent_i64_BinOp <xor, V_XOR_B32_e64>;
// 16-Bit Operand Instructions
//===----------------------------------------------------------------------===//

def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_I32> {
def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_I16> {
// The ldexp.f16 intrinsic expects a i32 src1 operand, though the hardware
// encoding treats src1 as an f16
let Src1RC32 = RegisterOperand<VGPR_32_Lo128>;
Expand All @@ -874,9 +874,9 @@ def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_I32> {
let isReMaterializable = 1 in {
let FPDPRounding = 1 in {
let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in
defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>;
defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I16, any_fldexp>;
let SubtargetPredicate = HasTrue16BitInsts in
defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16, AMDGPUldexp>;
defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16, any_fldexp>;
} // End FPDPRounding = 1
// FIXME VOP3 Only instructions. NFC using VOPProfile_True16 for these until a planned change to use a new register class for VOP3 encoded True16 instuctions
defm V_LSHLREV_B16 : VOP2Inst_e64_t16 <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", DIV_FIXUP_F32_PROF, AMDGPUdi

let SchedRW = [WriteDoubleAdd], FPDPRounding = 1 in {
defm V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, AMDGPUdiv_fixup>;
defm V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUldexp>;
defm V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, any_fldexp>;
} // End SchedRW = [WriteDoubleAdd], FPDPRounding = 1
} // End isReMaterializable = 1

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1635,7 +1635,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
ISD::FCOS, ISD::FPOW, ISD::FLOG, ISD::FLOG2,
ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FCEIL, ISD::FTRUNC,
ISD::FRINT, ISD::FNEARBYINT, ISD::FROUND, ISD::FFLOOR,
ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS,
ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS, ISD::FLDEXP,
// Misc:
ISD::BR_CC, ISD::SELECT_CC, ISD::ConstantPool,
// Vector:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -849,6 +849,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
setOperationAction(ISD::FRINT, VT, Expand);
setOperationAction(ISD::FLDEXP, VT, Expand);
setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FROUND, VT, Action);
setOperationAction(ISD::FROUNDEVEN, VT, Action);
setOperationAction(ISD::FTRUNC, VT, Action);
setOperationAction(ISD::FLDEXP, VT, Action);
};

if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
Expand Down Expand Up @@ -678,6 +679,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FMAXIMUM, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FPOW, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FLDEXP, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FLOG, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FLOG2, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FLOG10, MVT::f16, Promote);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,9 @@
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: G_FLDEXP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_FNEG (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
Expand Down Expand Up @@ -653,6 +656,9 @@
# DEBUG-NEXT: G_STRICT_FSQRT (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_STRICT_FLDEXP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_READ_REGISTER (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
Expand Down
134 changes: 0 additions & 134 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.mir

This file was deleted.

This file was deleted.

56 changes: 28 additions & 28 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,8 @@ body: |
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
; GFX6-NEXT: $vgpr0 = COPY [[INT1]](s32)
; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32)
; GFX6-NEXT: $vgpr0 = COPY [[FLDEXP]](s32)
; GFX8-LABEL: name: test_sitofp_s64_to_s32
; GFX8: liveins: $vgpr0_vgpr1
; GFX8-NEXT: {{ $}}
Expand All @@ -154,8 +154,8 @@ body: |
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
; GFX8-NEXT: $vgpr0 = COPY [[INT1]](s32)
; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32)
; GFX8-NEXT: $vgpr0 = COPY [[FLDEXP]](s32)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s32) = G_SITOFP %0
$vgpr0 = COPY %1
Expand All @@ -175,8 +175,8 @@ body: |
; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[UV1]](s32)
; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[UV]](s32)
; GFX6-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s64), [[C]](s32)
; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[INT]], [[UITOFP]]
; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s64) = G_FLDEXP [[SITOFP]], [[C]](s32)
; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FLDEXP]], [[UITOFP]]
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64)
; GFX8-LABEL: name: test_sitofp_s64_to_s64
; GFX8: liveins: $vgpr0_vgpr1
Expand All @@ -186,8 +186,8 @@ body: |
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[UV1]](s32)
; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[UV]](s32)
; GFX8-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s64), [[C]](s32)
; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[INT]], [[UITOFP]]
; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s64) = G_FLDEXP [[SITOFP]], [[C]](s32)
; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FLDEXP]], [[UITOFP]]
; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = G_SITOFP %0
Expand Down Expand Up @@ -476,8 +476,8 @@ body: |
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
; GFX6-NEXT: $vgpr0 = COPY [[INT1]](s32)
; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32)
; GFX6-NEXT: $vgpr0 = COPY [[FLDEXP]](s32)
; GFX8-LABEL: name: test_sitofp_s33_to_s32
; GFX8: liveins: $vgpr0_vgpr1
; GFX8-NEXT: {{ $}}
Expand All @@ -499,8 +499,8 @@ body: |
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
; GFX8-NEXT: $vgpr0 = COPY [[INT1]](s32)
; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32)
; GFX8-NEXT: $vgpr0 = COPY [[FLDEXP]](s32)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s33) = G_TRUNC %0
%2:_(s32) = G_SITOFP %1
Expand Down Expand Up @@ -533,8 +533,8 @@ body: |
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32)
; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32)
; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX8-LABEL: name: test_sitofp_s64_to_s16
Expand All @@ -557,8 +557,8 @@ body: |
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32)
; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32)
; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
%0:_(s64) = COPY $vgpr0_vgpr1
Expand Down Expand Up @@ -594,23 +594,23 @@ body: |
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[UMIN1]]
; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32)
; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32)
; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV6]], [[UV7]]
; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[XOR1]], [[C2]](s32)
; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR1]]
; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV7]](s32)
; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[INT2]], [[C1]]
; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV7]](s32)
; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[INT1]], [[C1]]
; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SUB2]], [[ADD1]]
; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN2]](s32)
; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL1]](s64)
; GFX6-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV8]]
; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[UMIN3]]
; GFX6-NEXT: [[SITOFP1:%[0-9]+]]:_(s32) = G_SITOFP [[OR1]](s32)
; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN2]]
; GFX6-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP1]](s32), [[SUB3]](s32)
; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT3]](s32)
; GFX6-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP1]], [[SUB3]](s32)
; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP1]](s32)
; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
Expand Down Expand Up @@ -639,23 +639,23 @@ body: |
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[UMIN1]]
; GFX8-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP]], [[SUB1]](s32)
; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32)
; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV6]], [[UV7]]
; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[XOR1]], [[C2]](s32)
; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR1]]
; GFX8-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV7]](s32)
; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[INT2]], [[C1]]
; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV7]](s32)
; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[INT1]], [[C1]]
; GFX8-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SUB2]], [[ADD1]]
; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN2]](s32)
; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL1]](s64)
; GFX8-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV8]]
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[UMIN3]]
; GFX8-NEXT: [[SITOFP1:%[0-9]+]]:_(s32) = G_SITOFP [[OR1]](s32)
; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN2]]
; GFX8-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP1]](s32), [[SUB3]](s32)
; GFX8-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT3]](s32)
; GFX8-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[SITOFP1]], [[SUB3]](s32)
; GFX8-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP1]](s32)
; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
Expand Down
48 changes: 24 additions & 24 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ body: |
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
; GFX6-NEXT: $vgpr0 = COPY [[INT]](s32)
; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32)
; GFX6-NEXT: $vgpr0 = COPY [[FLDEXP]](s32)
; GFX8-LABEL: name: test_uitofp_s64_to_s32
; GFX8: liveins: $vgpr0_vgpr1
; GFX8-NEXT: {{ $}}
Expand All @@ -115,8 +115,8 @@ body: |
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
; GFX8-NEXT: $vgpr0 = COPY [[INT]](s32)
; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32)
; GFX8-NEXT: $vgpr0 = COPY [[FLDEXP]](s32)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s32) = G_UITOFP %0
$vgpr0 = COPY %1
Expand All @@ -136,8 +136,8 @@ body: |
; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[UV1]](s32)
; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s64) = G_UITOFP [[UV]](s32)
; GFX6-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s64), [[C]](s32)
; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[INT]], [[UITOFP1]]
; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s64) = G_FLDEXP [[UITOFP]], [[C]](s32)
; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FLDEXP]], [[UITOFP1]]
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64)
; GFX8-LABEL: name: test_uitofp_s64_to_s64
; GFX8: liveins: $vgpr0_vgpr1
Expand All @@ -147,8 +147,8 @@ body: |
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[UV1]](s32)
; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s64) = G_UITOFP [[UV]](s32)
; GFX8-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s64), [[C]](s32)
; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[INT]], [[UITOFP1]]
; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s64) = G_FLDEXP [[UITOFP]], [[C]](s32)
; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FLDEXP]], [[UITOFP1]]
; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = G_UITOFP %0
Expand Down Expand Up @@ -444,8 +444,8 @@ body: |
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UMIN]]
; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
; GFX6-NEXT: $vgpr0 = COPY [[INT]](s32)
; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32)
; GFX6-NEXT: $vgpr0 = COPY [[FLDEXP]](s32)
; GFX8-LABEL: name: test_uitofp_s33_to_s32
; GFX8: liveins: $vgpr0_vgpr1
; GFX8-NEXT: {{ $}}
Expand All @@ -463,8 +463,8 @@ body: |
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UMIN]]
; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
; GFX8-NEXT: $vgpr0 = COPY [[INT]](s32)
; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32)
; GFX8-NEXT: $vgpr0 = COPY [[FLDEXP]](s32)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s33) = G_TRUNC %0
%2:_(s32) = G_UITOFP %1
Expand Down Expand Up @@ -492,8 +492,8 @@ body: |
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32)
; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32)
; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32)
; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX8-LABEL: name: test_uitofp_s64_to_s16
Expand All @@ -511,8 +511,8 @@ body: |
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32)
; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32)
; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32)
; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
%0:_(s64) = COPY $vgpr0_vgpr1
Expand Down Expand Up @@ -543,8 +543,8 @@ body: |
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[UMIN1]]
; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32)
; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32)
; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32)
; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
; GFX6-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV7]](s32)
; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]]
Expand All @@ -554,8 +554,8 @@ body: |
; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[UMIN3]]
; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[OR1]](s32)
; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN2]]
; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP1]](s32), [[SUB1]](s32)
; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
; GFX6-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP1]], [[SUB1]](s32)
; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP1]](s32)
; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
Expand All @@ -579,8 +579,8 @@ body: |
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[UMIN1]]
; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32)
; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP]], [[SUB]](s32)
; GFX8-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP]](s32)
; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
; GFX8-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV7]](s32)
; GFX8-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]]
Expand All @@ -590,8 +590,8 @@ body: |
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[UMIN3]]
; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[OR1]](s32)
; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN2]]
; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP1]](s32), [[SUB1]](s32)
; GFX8-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
; GFX8-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[UITOFP1]], [[SUB1]](s32)
; GFX8-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLDEXP1]](s32)
; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
Expand Down
549 changes: 323 additions & 226 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll

Large diffs are not rendered by default.

57 changes: 38 additions & 19 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,23 @@ define amdgpu_kernel void @ldexp_f16(
; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_mov_b32 s14, s2
; VI-NEXT: s_mov_b32 s10, s2
; VI-NEXT: s_mov_b32 s11, s3
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_mov_b32 s12, s6
; VI-NEXT: s_mov_b32 s13, s7
; VI-NEXT: s_mov_b32 s14, s2
; VI-NEXT: s_mov_b32 s15, s3
; VI-NEXT: s_mov_b32 s10, s2
; VI-NEXT: s_mov_b32 s11, s3
; VI-NEXT: buffer_load_ushort v0, off, s[12:15], 0
; VI-NEXT: buffer_load_dword v1, off, s[8:11], 0
; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
; VI-NEXT: buffer_load_ushort v1, off, s[12:15], 0
; VI-NEXT: s_mov_b32 s0, s4
; VI-NEXT: s_movk_i32 s4, 0x8000
; VI-NEXT: v_mov_b32_e32 v2, 0x7fff
; VI-NEXT: s_mov_b32 s1, s5
; VI-NEXT: s_waitcnt vmcnt(1)
; VI-NEXT: v_med3_i32 v0, v0, s4, v2
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_ldexp_f16_e32 v0, v0, v1
; VI-NEXT: v_ldexp_f16_e32 v0, v1, v0
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
;
Expand All @@ -35,19 +39,22 @@ define amdgpu_kernel void @ldexp_f16(
; GFX10-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34
; GFX10-NEXT: s_mov_b32 s2, -1
; GFX10-NEXT: s_mov_b32 s3, 0x31016000
; GFX10-NEXT: s_mov_b32 s14, s2
; GFX10-NEXT: s_mov_b32 s15, s3
; GFX10-NEXT: s_mov_b32 s10, s2
; GFX10-NEXT: s_mov_b32 s11, s3
; GFX10-NEXT: s_mov_b32 s14, s2
; GFX10-NEXT: s_mov_b32 s15, s3
; GFX10-NEXT: s_movk_i32 s0, 0x8000
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_mov_b32 s12, s6
; GFX10-NEXT: buffer_load_dword v0, off, s[8:11], 0
; GFX10-NEXT: s_mov_b32 s13, s7
; GFX10-NEXT: buffer_load_ushort v0, off, s[12:15], 0
; GFX10-NEXT: buffer_load_dword v1, off, s[8:11], 0
; GFX10-NEXT: s_mov_b32 s0, s4
; GFX10-NEXT: s_mov_b32 s1, s5
; GFX10-NEXT: buffer_load_ushort v1, off, s[12:15], 0
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: v_med3_i32 v0, v0, s0, 0x7fff
; GFX10-NEXT: s_mov_b32 s0, s4
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX10-NEXT: v_ldexp_f16_e32 v0, v1, v0
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX10-NEXT: s_endpgm
;
Expand All @@ -58,19 +65,23 @@ define amdgpu_kernel void @ldexp_f16(
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX11-NEXT: s_mov_b32 s10, -1
; GFX11-NEXT: s_mov_b32 s11, 0x31016000
; GFX11-NEXT: s_mov_b32 s14, s10
; GFX11-NEXT: s_mov_b32 s15, s11
; GFX11-NEXT: s_mov_b32 s2, s10
; GFX11-NEXT: s_mov_b32 s3, s11
; GFX11-NEXT: s_mov_b32 s14, s10
; GFX11-NEXT: s_mov_b32 s15, s11
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s12, s6
; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_mov_b32 s13, s7
; GFX11-NEXT: buffer_load_u16 v0, off, s[12:15], 0
; GFX11-NEXT: buffer_load_b32 v1, off, s[0:3], 0
; GFX11-NEXT: s_movk_i32 s0, 0x8000
; GFX11-NEXT: buffer_load_u16 v1, off, s[12:15], 0
; GFX11-NEXT: s_mov_b32 s8, s4
; GFX11-NEXT: s_mov_b32 s9, s5
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: v_med3_i32 v0, v0, s0, 0x7fff
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_ldexp_f16_e32 v0, v1, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
Expand All @@ -97,8 +108,11 @@ define amdgpu_kernel void @ldexp_f16_imm_a(
; VI-NEXT: s_mov_b32 s9, s3
; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
; VI-NEXT: s_mov_b32 s4, s0
; VI-NEXT: s_movk_i32 s0, 0x8000
; VI-NEXT: v_mov_b32_e32 v1, 0x7fff
; VI-NEXT: s_mov_b32 s5, s1
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_med3_i32 v0, v0, s0, v1
; VI-NEXT: v_ldexp_f16_e32 v0, 2.0, v0
; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
Expand All @@ -113,10 +127,12 @@ define amdgpu_kernel void @ldexp_f16_imm_a(
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_mov_b32 s8, s2
; GFX10-NEXT: s_mov_b32 s9, s3
; GFX10-NEXT: s_mov_b32 s4, s0
; GFX10-NEXT: s_movk_i32 s2, 0x8000
; GFX10-NEXT: buffer_load_dword v0, off, s[8:11], 0
; GFX10-NEXT: s_mov_b32 s4, s0
; GFX10-NEXT: s_mov_b32 s5, s1
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_med3_i32 v0, v0, s2, 0x7fff
; GFX10-NEXT: v_ldexp_f16_e32 v0, 2.0, v0
; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0
; GFX10-NEXT: s_endpgm
Expand All @@ -131,10 +147,13 @@ define amdgpu_kernel void @ldexp_f16_imm_a(
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s8, s2
; GFX11-NEXT: s_mov_b32 s9, s3
; GFX11-NEXT: s_mov_b32 s4, s0
; GFX11-NEXT: s_movk_i32 s2, 0x8000
; GFX11-NEXT: buffer_load_b32 v0, off, s[8:11], 0
; GFX11-NEXT: s_mov_b32 s4, s0
; GFX11-NEXT: s_mov_b32 s5, s1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_i32 v0, v0, s2, 0x7fff
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_ldexp_f16_e32 v0, 2.0, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
Expand Down
Loading