diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index a080964d54e829..d03b4c0f912a8d 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -957,6 +957,8 @@ The AMDGPU backend implements the following LLVM IR intrinsics. llvm.amdgcn.exp2 Provides direct access to v_exp_f32 and v_exp_f16 (on targets with half support). Performs exp2 function. + :ref:`llvm.frexp ` Implemented for half, float and double. + :ref:`llvm.log2 ` Implemented for float and half (and vectors of float or half). Not implemented for double. Hardware provides 1ULP accuracy for float, and 0.51ULP for half. Float @@ -964,7 +966,9 @@ The AMDGPU backend implements the following LLVM IR intrinsics. inputs. Backend will optimize out denormal scaling if marked with the :ref:`afn ` flag. - :ref:`llvm.frexp ` Implemented for half, float and double. + :ref:`llvm.log ` Implemented for float and half (and vectors). + + :ref:`llvm.log10 ` Implemented for float and half (and vectors). ========================================= ========================================================== diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 25a013254a4218..6d078bc8e129af 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -14813,6 +14813,8 @@ is unspecified. If the argument is an infinity, returns an infinity with the same sign and an unspecified exponent. +.. _int_log: + '``llvm.log.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -14851,6 +14853,8 @@ trapping or setting ``errno``. When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. +.. _int_log10: + '``llvm.log10.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index a8356001741f96..78246609bc740d 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -151,8 +151,9 @@ Changes to the AMDGPU Backend * Added llvm.amdgcn.exp2.f32 intrinsic. This provides direct access to v_exp_f32. -* llvm.log2.f32 is now lowered accurately. Use llvm.amdgcn.log.f32 to - access the old behavior. +* llvm.log2.f32, llvm.log10.f32, and llvm.log.f32 are now lowered + accurately. Use llvm.amdgcn.log.f32 to access the old behavior for + llvm.log2.f32. Changes to the ARM Backend -------------------------- diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index f3cceaa1d1f5ac..a6da98c953d8ab 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -350,6 +350,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FLOG2, MVT::f16, Custom); } + setOperationAction({ISD::FLOG10, ISD::FLOG}, MVT::f16, Custom); + // FIXME: These IS_FPCLASS vector fp types are marked custom so it reaches // scalarization code. Can be removed when IS_FPCLASS expand isn't called by // default unless marked custom/legal. @@ -1309,9 +1311,8 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, case ISD::FLOG2: return LowerFLOG2(Op, DAG); case ISD::FLOG: - return LowerFLOG(Op, DAG, numbers::ln2); case ISD::FLOG10: - return LowerFLOG(Op, DAG, numbers::ln2 / numbers::ln10); + return LowerFLOGCommon(Op, DAG); case ISD::FEXP: return lowerFEXP(Op, DAG); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); @@ -1346,6 +1347,11 @@ void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N, if (SDValue Lowered = LowerFLOG2(SDValue(N, 0), DAG)) Results.push_back(Lowered); return; + case ISD::FLOG: + case ISD::FLOG10: + if (SDValue Lowered = LowerFLOGCommon(SDValue(N, 0), DAG)) + Results.push_back(Lowered); + return; default: return; } @@ -2447,6 +2453,75 @@ static bool valueIsKnownNeverF32Denorm(SDValue Src) { llvm_unreachable("covered opcode switch"); } +static bool needsDenormHandlingF32(const SelectionDAG &DAG, SDValue Src, + SDNodeFlags Flags) { + return !Flags.hasApproximateFuncs() && + !DAG.getTarget().Options.UnsafeFPMath && + !DAG.getTarget().Options.ApproxFuncFPMath && + !valueIsKnownNeverF32Denorm(Src) && + DAG.getMachineFunction() + .getDenormalMode(APFloat::IEEEsingle()) + .Input != DenormalMode::PreserveSign; +} + +SDValue AMDGPUTargetLowering::getIsLtSmallestNormal(SelectionDAG &DAG, + SDValue Src, + SDNodeFlags Flags) const { + SDLoc SL(Src); + EVT VT = Src.getValueType(); + const fltSemantics &Semantics = SelectionDAG::EVTToAPFloatSemantics(VT); + SDValue SmallestNormal = + DAG.getConstantFP(APFloat::getSmallestNormalized(Semantics), SL, VT); + + // Want to scale denormals up, but negatives and 0 work just as well on the + // scaled path. + SDValue IsLtSmallestNormal = DAG.getSetCC( + SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Src, + SmallestNormal, ISD::SETOLT); + + return IsLtSmallestNormal; +} + +SDValue AMDGPUTargetLowering::getIsFinite(SelectionDAG &DAG, SDValue Src, + SDNodeFlags Flags) const { + SDLoc SL(Src); + EVT VT = Src.getValueType(); + const fltSemantics &Semantics = SelectionDAG::EVTToAPFloatSemantics(VT); + SDValue Inf = DAG.getConstantFP(APFloat::getInf(Semantics), SL, VT); + + SDValue Fabs = DAG.getNode(ISD::FABS, SL, VT, Src, Flags); + SDValue IsFinite = DAG.getSetCC( + SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Fabs, + Inf, ISD::SETOLT); + return IsFinite; +} + +/// If denormal handling is required return the scaled input to FLOG2, and the +/// check for denormal range. Otherwise, return null values. +std::pair +AMDGPUTargetLowering::getScaledLogInput(SelectionDAG &DAG, const SDLoc SL, + SDValue Src, SDNodeFlags Flags) const { + if (!needsDenormHandlingF32(DAG, Src, Flags)) + return {}; + + MVT VT = MVT::f32; + const fltSemantics &Semantics = APFloat::IEEEsingle(); + SDValue SmallestNormal = + DAG.getConstantFP(APFloat::getSmallestNormalized(Semantics), SL, VT); + + SDValue IsLtSmallestNormal = DAG.getSetCC( + SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Src, + SmallestNormal, ISD::SETOLT); + + SDValue Scale32 = DAG.getConstantFP(0x1.0p+32, SL, VT); + SDValue One = DAG.getConstantFP(1.0, SL, VT); + SDValue ScaleFactor = + DAG.getNode(ISD::SELECT, SL, VT, IsLtSmallestNormal, Scale32, One, Flags); + + SDValue ScaledInput = DAG.getNode(ISD::FMUL, SL, VT, Src, ScaleFactor, Flags); + return {ScaledInput, IsLtSmallestNormal}; +} + SDValue AMDGPUTargetLowering::LowerFLOG2(SDValue Op, SelectionDAG &DAG) const { // v_log_f32 is good enough for OpenCL, except it doesn't handle denormals. // If we have to handle denormals, scale up the input and adjust the result. @@ -2468,32 +2543,11 @@ SDValue AMDGPUTargetLowering::LowerFLOG2(SDValue Op, SelectionDAG &DAG) const { DAG.getTargetConstant(0, SL, MVT::i32), Flags); } - bool NeedDenormHandling = - !Flags.hasApproximateFuncs() && !DAG.getTarget().Options.UnsafeFPMath && - !DAG.getTarget().Options.ApproxFuncFPMath && - !valueIsKnownNeverF32Denorm(Src) && - DAG.getDenormalMode(VT).Input != DenormalMode::PreserveSign; - - if (!NeedDenormHandling) + auto [ScaledInput, IsLtSmallestNormal] = + getScaledLogInput(DAG, SL, Src, Flags); + if (!ScaledInput) return DAG.getNode(AMDGPUISD::LOG, SL, VT, Src, Flags); - const fltSemantics &Semantics = APFloat::IEEEsingle(); - SDValue SmallestNormal = - DAG.getConstantFP(APFloat::getSmallestNormalized(Semantics), SL, VT); - - // Want to scale denormals up, but negatives and 0 work just as well on the - // scaled path. - SDValue IsLtSmallestNormal = DAG.getSetCC( - SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Src, - SmallestNormal, ISD::SETOLT); - - SDValue Scale32 = DAG.getConstantFP(0x1.0p+32, SL, VT); - SDValue One = DAG.getConstantFP(1.0, SL, VT); - SDValue ScaleFactor = - DAG.getNode(ISD::SELECT, SL, VT, IsLtSmallestNormal, Scale32, One, Flags); - - SDValue ScaledInput = DAG.getNode(ISD::FMUL, SL, VT, Src, ScaleFactor, Flags); - SDValue Log2 = DAG.getNode(AMDGPUISD::LOG, SL, VT, ScaledInput, Flags); SDValue ThirtyTwo = DAG.getConstantFP(32.0, SL, VT); @@ -2503,16 +2557,128 @@ SDValue AMDGPUTargetLowering::LowerFLOG2(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::FSUB, SL, VT, Log2, ResultOffset, Flags); } -SDValue AMDGPUTargetLowering::LowerFLOG(SDValue Op, SelectionDAG &DAG, - double Log2BaseInverted) const { +static SDValue getMad(SelectionDAG &DAG, const SDLoc &SL, EVT VT, SDValue X, + SDValue Y, SDValue C, SDNodeFlags Flags = SDNodeFlags()) { + SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, X, Y, Flags); + return DAG.getNode(ISD::FADD, SL, VT, Mul, C, Flags); +} + +SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op, + SelectionDAG &DAG) const { + SDValue X = Op.getOperand(0); EVT VT = Op.getValueType(); + SDNodeFlags Flags = Op->getFlags(); + SDLoc DL(Op); - SDLoc SL(Op); - SDValue Operand = Op.getOperand(0); - SDValue Log2Operand = DAG.getNode(ISD::FLOG2, SL, VT, Operand); + const bool IsLog10 = Op.getOpcode() == ISD::FLOG10; + assert(IsLog10 || Op.getOpcode() == ISD::FLOG); + + const auto &Options = getTargetMachine().Options; + if (VT == MVT::f16 || Flags.hasApproximateFuncs() || + Options.ApproxFuncFPMath || Options.UnsafeFPMath) { + + if (VT == MVT::f16 && !Subtarget->has16BitInsts()) { + // Log and multiply in f32 is good enough for f16. + X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X, Flags); + } + + SDValue Lowered = LowerFLOGUnsafe( + X, DL, DAG, IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2, + Flags); + if (VT == MVT::f16 && !Subtarget->has16BitInsts()) { + return DAG.getNode(ISD::FP_ROUND, DL, VT, Lowered, + DAG.getTargetConstant(0, DL, MVT::i32), Flags); + } + + return Lowered; + } + + auto [ScaledInput, IsScaled] = getScaledLogInput(DAG, DL, X, Flags); + if (ScaledInput) + X = ScaledInput; + + SDValue Y = DAG.getNode(AMDGPUISD::LOG, DL, VT, X, Flags); + + SDValue R; + if (Subtarget->hasFastFMAF32()) { + // c+cc are ln(2)/ln(10) to more than 49 bits + const float c_log10 = 0x1.344134p-2f; + const float cc_log10 = 0x1.09f79ep-26f; + + // c + cc is ln(2) to more than 49 bits + const float c_log = 0x1.62e42ep-1f; + const float cc_log = 0x1.efa39ep-25f; + + SDValue C = DAG.getConstantFP(IsLog10 ? c_log10 : c_log, DL, VT); + SDValue CC = DAG.getConstantFP(IsLog10 ? cc_log10 : cc_log, DL, VT); + + R = DAG.getNode(ISD::FMUL, DL, VT, Y, C, Flags); + SDValue NegR = DAG.getNode(ISD::FNEG, DL, VT, R, Flags); + SDValue FMA0 = DAG.getNode(ISD::FMA, DL, VT, Y, C, NegR, Flags); + SDValue FMA1 = DAG.getNode(ISD::FMA, DL, VT, Y, CC, FMA0, Flags); + R = DAG.getNode(ISD::FADD, DL, VT, R, FMA1, Flags); + } else { + // ch+ct is ln(2)/ln(10) to more than 36 bits + const float ch_log10 = 0x1.344000p-2f; + const float ct_log10 = 0x1.3509f6p-18f; + + // ch + ct is ln(2) to more than 36 bits + const float ch_log = 0x1.62e000p-1f; + const float ct_log = 0x1.0bfbe8p-15f; + + SDValue CH = DAG.getConstantFP(IsLog10 ? ch_log10 : ch_log, DL, VT); + SDValue CT = DAG.getConstantFP(IsLog10 ? ct_log10 : ct_log, DL, VT); + + SDValue YAsInt = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Y); + SDValue MaskConst = DAG.getConstant(0xfffff000, DL, MVT::i32); + SDValue YHInt = DAG.getNode(ISD::AND, DL, MVT::i32, YAsInt, MaskConst); + SDValue YH = DAG.getNode(ISD::BITCAST, DL, MVT::f32, YHInt); + SDValue YT = DAG.getNode(ISD::FSUB, DL, VT, Y, YH, Flags); + + SDValue YTCT = DAG.getNode(ISD::FMUL, DL, VT, YT, CT, Flags); + SDValue Mad0 = getMad(DAG, DL, VT, YH, CT, YTCT, Flags); + SDValue Mad1 = getMad(DAG, DL, VT, YT, CH, Mad0, Flags); + R = getMad(DAG, DL, VT, YH, CH, Mad1); + } + + const bool IsFiniteOnly = (Flags.hasNoNaNs() || Options.NoNaNsFPMath) && + (Flags.hasNoInfs() || Options.NoInfsFPMath); + + // TODO: Check if known finite from source value. + if (!IsFiniteOnly) { + SDValue IsFinite = getIsFinite(DAG, Y, Flags); + R = DAG.getNode(ISD::SELECT, DL, VT, IsFinite, R, Y, Flags); + } + + if (IsScaled) { + SDValue Zero = DAG.getConstantFP(0.0f, DL, VT); + SDValue ShiftK = + DAG.getConstantFP(IsLog10 ? 0x1.344136p+3f : 0x1.62e430p+4f, DL, VT); + SDValue Shift = + DAG.getNode(ISD::SELECT, DL, VT, IsScaled, ShiftK, Zero, Flags); + R = DAG.getNode(ISD::FSUB, DL, VT, R, Shift, Flags); + } + + return R; +} + +SDValue AMDGPUTargetLowering::LowerFLOG10(SDValue Op, SelectionDAG &DAG) const { + return LowerFLOGCommon(Op, DAG); +} + +// Do f32 fast math expansion for flog2 or flog10. This is accurate enough for a +// promote f16 operation. +SDValue AMDGPUTargetLowering::LowerFLOGUnsafe(SDValue Src, const SDLoc &SL, + SelectionDAG &DAG, + double Log2BaseInverted, + SDNodeFlags Flags) const { + EVT VT = Src.getValueType(); + unsigned LogOp = VT == MVT::f32 ? AMDGPUISD::LOG : ISD::FLOG2; + SDValue Log2Operand = DAG.getNode(LogOp, SL, VT, Src, Flags); SDValue Log2BaseInvertedOperand = DAG.getConstantFP(Log2BaseInverted, SL, VT); - return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand); + return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand, + Flags); } // exp2(M_LOG2E_F * f); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index f25480906ee520..200a7bc4eca878 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -60,9 +60,19 @@ class AMDGPUTargetLowering : public TargetLowering { SDValue LowerFROUNDEVEN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const; + + SDValue getIsLtSmallestNormal(SelectionDAG &DAG, SDValue Op, + SDNodeFlags Flags) const; + SDValue getIsFinite(SelectionDAG &DAG, SDValue Op, SDNodeFlags Flags) const; + std::pair getScaledLogInput(SelectionDAG &DAG, + const SDLoc SL, SDValue Op, + SDNodeFlags Flags) const; + SDValue LowerFLOG2(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFLOG(SDValue Op, SelectionDAG &DAG, - double Log2BaseInverted) const; + SDValue LowerFLOGCommon(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFLOG10(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFLOGUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, + double Log2BaseInverted, SDNodeFlags Flags) const; SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 37f4ae506afccc..4edb6e5a4feac6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1130,7 +1130,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, Exp2Ops.clampScalar(0, MinScalarFPTy, S32); Exp2Ops.scalarize(0); - auto &ExpOps = getActionDefinitionsBuilder({G_FEXP, G_FLOG, G_FLOG10, G_FPOW}); + auto &ExpOps = getActionDefinitionsBuilder({G_FEXP, G_FPOW}); if (ST.has16BitInsts()) ExpOps.customFor({{S32}, {S16}}); else @@ -1151,6 +1151,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, Log2Ops.scalarize(0) .lower(); + auto &LogOps = getActionDefinitionsBuilder({G_FLOG, G_FLOG10}); + LogOps.customFor({S32, S16}); + LogOps.clampScalar(0, MinScalarFPTy, S32) + .scalarize(0); + // The 64-bit versions produce 32-bit results, but only on the SALU. getActionDefinitionsBuilder(G_CTPOP) .legalFor({{S32, S32}, {S32, S64}}) @@ -2012,9 +2017,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper, case TargetOpcode::G_FLOG2: return legalizeFlog2(MI, B); case TargetOpcode::G_FLOG: - return legalizeFlog(MI, B, numbers::ln2); case TargetOpcode::G_FLOG10: - return legalizeFlog(MI, B, numbers::ln2 / numbers::ln10); + return legalizeFlogCommon(MI, B); case TargetOpcode::G_FEXP: return legalizeFExp(MI, B); case TargetOpcode::G_FPOW: @@ -3012,6 +3016,37 @@ static bool valueIsKnownNeverF32Denorm(const MachineRegisterInfo &MRI, return false; } +static bool needsDenormHandlingF32(const MachineFunction &MF, Register Src, + unsigned Flags) { + return (Flags & MachineInstr::FmAfn) == 0 && + !MF.getTarget().Options.UnsafeFPMath && + !MF.getTarget().Options.ApproxFuncFPMath && + !valueIsKnownNeverF32Denorm(MF.getRegInfo(), Src) && + MF.getDenormalMode(APFloat::IEEEsingle()).Input != + DenormalMode::PreserveSign; +} + +std::pair +AMDGPULegalizerInfo::getScaledLogInput(MachineIRBuilder &B, Register Src, + unsigned Flags) const { + if (!needsDenormHandlingF32(B.getMF(), Src, Flags)) + return {}; + + const LLT F32 = LLT::scalar(32); + auto SmallestNormal = B.buildFConstant( + F32, APFloat::getSmallestNormalized(APFloat::IEEEsingle())); + auto IsLtSmallestNormal = + B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), Src, SmallestNormal); + + auto Scale32 = B.buildFConstant(F32, 0x1.0p+32); + auto One = B.buildFConstant(F32, 1.0); + auto ScaleFactor = + B.buildSelect(F32, IsLtSmallestNormal, Scale32, One, Flags); + auto ScaledInput = B.buildFMul(F32, Src, ScaleFactor, Flags); + + return {ScaledInput.getReg(0), IsLtSmallestNormal.getReg(0)}; +} + bool AMDGPULegalizerInfo::legalizeFlog2(MachineInstr &MI, MachineIRBuilder &B) const { // v_log_f32 is good enough for OpenCL, except it doesn't handle denormals. @@ -3025,8 +3060,6 @@ bool AMDGPULegalizerInfo::legalizeFlog2(MachineInstr &MI, LLT Ty = B.getMRI()->getType(Dst); unsigned Flags = MI.getFlags(); - const MachineFunction &MF = B.getMF(); - if (Ty == LLT::scalar(16)) { const LLT F32 = LLT::scalar(32); // Nothing in half is a denormal when promoted to f32. @@ -3041,60 +3074,162 @@ bool AMDGPULegalizerInfo::legalizeFlog2(MachineInstr &MI, assert(Ty == LLT::scalar(32)); - const fltSemantics &Flt = APFloat::IEEEsingle(); - - bool NeedDenormHandling = - !MI.getFlag(MachineInstr::FmAfn) && - !MF.getTarget().Options.UnsafeFPMath && - !MF.getTarget().Options.ApproxFuncFPMath && - !valueIsKnownNeverF32Denorm(*B.getMRI(), Src) && - MF.getDenormalMode(Flt).Input != DenormalMode::PreserveSign; - - if (!NeedDenormHandling) { - B.buildIntrinsic(Intrinsic::amdgcn_log, ArrayRef{Dst}, false) - .addUse(Src) - .setMIFlags(Flags); + auto [ScaledInput, IsLtSmallestNormal] = getScaledLogInput(B, Src, Flags); + if (!ScaledInput) { + B.buildIntrinsic(Intrinsic::amdgcn_log, {MI.getOperand(0)}, false) + .addUse(Src) + .setMIFlags(Flags); MI.eraseFromParent(); return true; } - auto SmallestNormal = - B.buildFConstant(Ty, APFloat::getSmallestNormalized(Flt)); - auto IsDenormOrZero = - B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), Src, SmallestNormal); - - auto Scale32 = B.buildFConstant(Ty, 0x1.0p+32); - auto One = B.buildFConstant(Ty, 1.0); - auto ScaleFactor = B.buildSelect(Ty, IsDenormOrZero, Scale32, One, Flags); - auto ScaledInput = B.buildFMul(Ty, Src, ScaleFactor, Flags); auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false) - .addUse(ScaledInput.getReg(0)) - .setMIFlags(Flags); + .addUse(ScaledInput) + .setMIFlags(Flags); auto ThirtyTwo = B.buildFConstant(Ty, 32.0); auto Zero = B.buildFConstant(Ty, 0.0); - auto ResultOffset = B.buildSelect(Ty, IsDenormOrZero, ThirtyTwo, Zero, Flags); + auto ResultOffset = + B.buildSelect(Ty, IsLtSmallestNormal, ThirtyTwo, Zero, Flags); B.buildFSub(Dst, Log2, ResultOffset, Flags); MI.eraseFromParent(); return true; } -bool AMDGPULegalizerInfo::legalizeFlog( - MachineInstr &MI, MachineIRBuilder &B, double Log2BaseInverted) const { +static Register getMad(MachineIRBuilder &B, LLT Ty, Register X, Register Y, + Register Z, unsigned Flags) { + auto FMul = B.buildFMul(Ty, X, Y, Flags); + return B.buildFAdd(Ty, FMul, Z, Flags).getReg(0); +} + +bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI, + MachineIRBuilder &B) const { + const bool IsLog10 = MI.getOpcode() == TargetOpcode::G_FLOG10; + assert(IsLog10 || MI.getOpcode() == TargetOpcode::G_FLOG); + + MachineRegisterInfo &MRI = *B.getMRI(); Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - LLT Ty = B.getMRI()->getType(Dst); + Register X = MI.getOperand(1).getReg(); unsigned Flags = MI.getFlags(); + const LLT Ty = MRI.getType(X); + MachineFunction &MF = B.getMF(); - auto Log2Operand = B.buildFLog2(Ty, Src, Flags); - auto Log2BaseInvertedOperand = B.buildFConstant(Ty, Log2BaseInverted); + const LLT F32 = LLT::scalar(32); + const LLT F16 = LLT::scalar(16); + + const AMDGPUTargetMachine &TM = + static_cast(MF.getTarget()); + + if (Ty == F16 || MI.getFlag(MachineInstr::FmAfn) || + TM.Options.ApproxFuncFPMath || TM.Options.UnsafeFPMath) { + const double Log2BaseInv = + IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2; + + if (Ty == F16 && !ST.has16BitInsts()) { + Register LogVal = MRI.createGenericVirtualRegister(F32); + auto PromoteSrc = B.buildFPExt(F32, X); + legalizeFlogUnsafe(B, LogVal, PromoteSrc.getReg(0), Log2BaseInv, Flags); + B.buildFPTrunc(Dst, LogVal); + } else { + legalizeFlogUnsafe(B, Dst, X, Log2BaseInv, Flags); + } + + MI.eraseFromParent(); + return true; + } + + auto [ScaledInput, IsScaled] = getScaledLogInput(B, X, Flags); + if (ScaledInput) + X = ScaledInput; + + auto Y = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false) + .addUse(X) + .setMIFlags(Flags); + + Register R; + if (ST.hasFastFMAF32()) { + // c+cc are ln(2)/ln(10) to more than 49 bits + const float c_log10 = 0x1.344134p-2f; + const float cc_log10 = 0x1.09f79ep-26f; + + // c + cc is ln(2) to more than 49 bits + const float c_log = 0x1.62e42ep-1f; + const float cc_log = 0x1.efa39ep-25f; + + auto C = B.buildFConstant(Ty, IsLog10 ? c_log10 : c_log); + auto CC = B.buildFConstant(Ty, IsLog10 ? cc_log10 : cc_log); + + R = B.buildFMul(Ty, Y, C, Flags).getReg(0); + auto NegR = B.buildFNeg(Ty, R, Flags); + auto FMA0 = B.buildFMA(Ty, Y, C, NegR, Flags); + auto FMA1 = B.buildFMA(Ty, Y, CC, FMA0, Flags); + R = B.buildFAdd(Ty, R, FMA1, Flags).getReg(0); + } else { + // ch+ct is ln(2)/ln(10) to more than 36 bits + const float ch_log10 = 0x1.344000p-2f; + const float ct_log10 = 0x1.3509f6p-18f; + + // ch + ct is ln(2) to more than 36 bits + const float ch_log = 0x1.62e000p-1f; + const float ct_log = 0x1.0bfbe8p-15f; + + auto CH = B.buildFConstant(Ty, IsLog10 ? ch_log10 : ch_log); + auto CT = B.buildFConstant(Ty, IsLog10 ? ct_log10 : ct_log); + + auto MaskConst = B.buildConstant(Ty, 0xfffff000); + auto YH = B.buildAnd(Ty, Y, MaskConst); + auto YT = B.buildFSub(Ty, Y, YH, Flags); + auto YTCT = B.buildFMul(Ty, YT, CT, Flags); + + Register Mad0 = + getMad(B, Ty, YH.getReg(0), CT.getReg(0), YTCT.getReg(0), Flags); + Register Mad1 = getMad(B, Ty, YT.getReg(0), CH.getReg(0), Mad0, Flags); + R = getMad(B, Ty, YH.getReg(0), CH.getReg(0), Mad1, Flags); + } + + const bool IsFiniteOnly = + (MI.getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath) && + (MI.getFlag(MachineInstr::FmNoInfs) || TM.Options.NoInfsFPMath); + + if (!IsFiniteOnly) { + // Expand isfinite(x) => fabs(x) < inf + auto Inf = B.buildFConstant(Ty, APFloat::getInf(APFloat::IEEEsingle())); + auto Fabs = B.buildFAbs(Ty, Y); + auto IsFinite = + B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), Fabs, Inf, Flags); + R = B.buildSelect(Ty, IsFinite, R, Y, Flags).getReg(0); + } + + if (ScaledInput) { + auto Zero = B.buildFConstant(Ty, 0.0); + auto ShiftK = + B.buildFConstant(Ty, IsLog10 ? 0x1.344136p+3f : 0x1.62e430p+4f); + auto Shift = B.buildSelect(Ty, IsScaled, ShiftK, Zero, Flags); + B.buildFSub(Dst, R, Shift, Flags); + } else { + B.buildCopy(Dst, R); + } - B.buildFMul(Dst, Log2Operand, Log2BaseInvertedOperand, Flags); MI.eraseFromParent(); return true; } +bool AMDGPULegalizerInfo::legalizeFlogUnsafe(MachineIRBuilder &B, Register Dst, + Register Src, + double Log2BaseInverted, + unsigned Flags) const { + LLT Ty = B.getMRI()->getType(Dst); + auto Log2Operand = Ty == LLT::scalar(16) + ? B.buildFLog2(Ty, Src, Flags) + : B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false) + .addUse(Src) + .setMIFlags(Flags); + auto Log2BaseInvertedOperand = B.buildFConstant(Ty, Log2BaseInverted); + B.buildFMul(Dst, Log2Operand, Log2BaseInvertedOperand, Flags); + return true; +} + bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI, MachineIRBuilder &B) const { Register Dst = MI.getOperand(0).getReg(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 6f2cd1e6a6f1f0..2863d36ce059d1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -78,9 +78,14 @@ class AMDGPULegalizerInfo final : public LegalizerInfo { bool legalizeAtomicCmpXChg(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; + + std::pair + getScaledLogInput(MachineIRBuilder &B, Register Src, unsigned Flags) const; + bool legalizeFlog2(MachineInstr &MI, MachineIRBuilder &B) const; - bool legalizeFlog(MachineInstr &MI, MachineIRBuilder &B, - double Log2BaseInverted) const; + bool legalizeFlogCommon(MachineInstr &MI, MachineIRBuilder &B) const; + bool legalizeFlogUnsafe(MachineIRBuilder &B, Register Dst, Register Src, + double Log2BaseInverted, unsigned Flags) const; bool legalizeFExp(MachineInstr &MI, MachineIRBuilder &B) const; bool legalizeFPow(MachineInstr &MI, MachineIRBuilder &B) const; bool legalizeFFloor(MachineInstr &MI, MachineRegisterInfo &MRI, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 4827f319b44417..10ce00fe68ca3e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -61,6 +61,7 @@ class AMDGPUSubtarget { bool HasFminFmaxLegacy = true; bool EnablePromoteAlloca = false; bool HasTrigReducedRange = false; + bool FastFMAF32 = false; unsigned EUsPerCU = 4; unsigned MaxWavesPerEU = 10; unsigned LocalMemorySize = 0; @@ -198,6 +199,10 @@ class AMDGPUSubtarget { return HasTrigReducedRange; } + bool hasFastFMAF32() const { + return FastFMAF32; + } + bool isPromoteAllocaEnabled() const { return EnablePromoteAlloca; } diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index eff25cd8b3c665..4345ae9cc69145 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -65,7 +65,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, unsigned MaxPrivateElementSize = 0; // Possibly statically set by tablegen, but may want to be overridden. - bool FastFMAF32 = false; bool FastDenormalF32 = false; bool HalfRate64Ops = false; bool FullRate64Ops = false; @@ -328,10 +327,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return FP64; } - bool hasFastFMAF32() const { - return FastFMAF32; - } - bool hasHalfRate64Ops() const { return HalfRate64Ops; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog.mir index 9bc02a199f409e..06fbbefd2c58ee 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog.mir @@ -18,13 +18,22 @@ body: | ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[SELECT]] ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E4300000000 - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C5]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMUL1]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E42E0000000 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E6EFA39E0000000 + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[C3]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] + ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C3]], [[FNEG]] + ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C4]], [[FMA]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FMA1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]] + ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS]](s32), [[C5]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40362E4300000000 + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[SELECT1]], [[SELECT2]] + ; CHECK-NEXT: $vgpr0 = COPY [[FSUB]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FLOG %0 $vgpr0 = COPY %1 @@ -47,13 +56,22 @@ body: | ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C1]], [[C2]] ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[SELECT]] ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[INT]], [[SELECT1]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E4300000000 - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[FSUB]], [[C5]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMUL1]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E42E0000000 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E6EFA39E0000000 + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT]], [[C3]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[FMUL1]] + ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[INT]], [[C3]], [[FNEG]] + ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[INT]], [[C4]], [[FMA]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL1]], [[FMA1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]] + ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(olt), [[FABS]](s32), [[C5]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40362E4300000000 + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[SELECT1]], [[SELECT2]] + ; CHECK-NEXT: $vgpr0 = COPY [[FSUB]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = nnan G_FLOG %0 $vgpr0 = COPY %1 @@ -77,20 +95,36 @@ body: | ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[SELECT]] ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E4300000000 - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C5]] - ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E42E0000000 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E6EFA39E0000000 + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[C3]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] + ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C3]], [[FNEG]] + ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C4]], [[FMA]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FMA1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]] + ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS]](s32), [[C5]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40362E4300000000 + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[SELECT1]], [[SELECT2]] + ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT3]] ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]] - ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[INT1]], [[SELECT3]] - ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FSUB1]], [[C5]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32) + ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[C3]] + ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL3]] + ; CHECK-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C3]], [[FNEG1]] + ; CHECK-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C4]], [[FMA2]] + ; CHECK-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMA3]] + ; CHECK-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[INT1]] + ; CHECK-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS1]](s32), [[C5]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[FADD1]], [[INT1]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[SELECT4]], [[SELECT5]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_FLOG %0 @@ -115,27 +149,50 @@ body: | ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[SELECT]] ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E4300000000 - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C5]] - ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E42E0000000 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E6EFA39E0000000 + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[C3]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] + ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C3]], [[FNEG]] + ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C4]], [[FMA]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FMA1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]] + ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS]](s32), [[C5]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40362E4300000000 + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[SELECT1]], [[SELECT2]] + ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT3]] ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]] - ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[INT1]], [[SELECT3]] - ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FSUB1]], [[C5]] - ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[SELECT4]] + ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[C3]] + ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL3]] + ; CHECK-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C3]], [[FNEG1]] + ; CHECK-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C4]], [[FMA2]] + ; CHECK-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMA3]] + ; CHECK-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[INT1]] + ; CHECK-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS1]](s32), [[C5]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[FADD1]], [[INT1]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[SELECT4]], [[SELECT5]] + ; CHECK-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]] + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[SELECT6]] ; CHECK-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL4]](s32) - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[C4]] - ; CHECK-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[INT2]], [[SELECT5]] - ; CHECK-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[FSUB2]], [[C5]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32), [[FMUL5]](s32) + ; CHECK-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[C3]] + ; CHECK-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FMUL5]] + ; CHECK-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[INT2]], [[C3]], [[FNEG2]] + ; CHECK-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[INT2]], [[C4]], [[FMA4]] + ; CHECK-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[FMA5]] + ; CHECK-NEXT: [[FABS2:%[0-9]+]]:_(s32) = G_FABS [[INT2]] + ; CHECK-NEXT: [[FCMP5:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS2]](s32), [[C5]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[FCMP5]](s1), [[FADD2]], [[INT2]] + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[SELECT7]], [[SELECT8]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32), [[FSUB2]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = G_FLOG %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog10.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog10.mir index 7feba0889bbfa9..28cd7834739789 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog10.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog10.mir @@ -18,13 +18,22 @@ body: | ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[SELECT]] ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441360000000 - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C5]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMUL1]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E509F79E0000000 + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[C3]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] + ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C3]], [[FNEG]] + ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C4]], [[FMA]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FMA1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]] + ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS]](s32), [[C5]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x4023441360000000 + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[SELECT1]], [[SELECT2]] + ; CHECK-NEXT: $vgpr0 = COPY [[FSUB]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FLOG10 %0 $vgpr0 = COPY %1 @@ -47,13 +56,22 @@ body: | ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C1]], [[C2]] ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[SELECT]] ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[INT]], [[SELECT1]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441360000000 - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[FSUB]], [[C5]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMUL1]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E509F79E0000000 + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT]], [[C3]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[FMUL1]] + ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[INT]], [[C3]], [[FNEG]] + ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[INT]], [[C4]], [[FMA]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL1]], [[FMA1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]] + ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(olt), [[FABS]](s32), [[C5]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x4023441360000000 + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[SELECT1]], [[SELECT2]] + ; CHECK-NEXT: $vgpr0 = COPY [[FSUB]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = nnan G_FLOG10 %0 $vgpr0 = COPY %1 @@ -77,20 +95,36 @@ body: | ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[SELECT]] ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441360000000 - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C5]] - ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E509F79E0000000 + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[C3]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] + ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C3]], [[FNEG]] + ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C4]], [[FMA]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FMA1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]] + ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS]](s32), [[C5]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x4023441360000000 + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[SELECT1]], [[SELECT2]] + ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT3]] ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]] - ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[INT1]], [[SELECT3]] - ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FSUB1]], [[C5]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32) + ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[C3]] + ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL3]] + ; CHECK-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C3]], [[FNEG1]] + ; CHECK-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C4]], [[FMA2]] + ; CHECK-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMA3]] + ; CHECK-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[INT1]] + ; CHECK-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS1]](s32), [[C5]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[FADD1]], [[INT1]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[SELECT4]], [[SELECT5]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_FLOG10 %0 @@ -115,27 +149,50 @@ body: | ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[SELECT]] ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]] - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441360000000 - ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C5]] - ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E509F79E0000000 + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[C3]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] + ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C3]], [[FNEG]] + ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C4]], [[FMA]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FMA1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]] + ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS]](s32), [[C5]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x4023441360000000 + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[SELECT1]], [[SELECT2]] + ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT3]] ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]] - ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[INT1]], [[SELECT3]] - ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FSUB1]], [[C5]] - ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]] - ; CHECK-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[SELECT4]] + ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[C3]] + ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL3]] + ; CHECK-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C3]], [[FNEG1]] + ; CHECK-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C4]], [[FMA2]] + ; CHECK-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMA3]] + ; CHECK-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[INT1]] + ; CHECK-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS1]](s32), [[C5]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[FADD1]], [[INT1]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[SELECT4]], [[SELECT5]] + ; CHECK-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]] + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C1]], [[C2]] + ; CHECK-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[SELECT6]] ; CHECK-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL4]](s32) - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[C4]] - ; CHECK-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[INT2]], [[SELECT5]] - ; CHECK-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[FSUB2]], [[C5]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32), [[FMUL5]](s32) + ; CHECK-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[C3]] + ; CHECK-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FMUL5]] + ; CHECK-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[INT2]], [[C3]], [[FNEG2]] + ; CHECK-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[INT2]], [[C4]], [[FMA4]] + ; CHECK-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[FMA5]] + ; CHECK-NEXT: [[FABS2:%[0-9]+]]:_(s32) = G_FABS [[INT2]] + ; CHECK-NEXT: [[FCMP5:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS2]](s32), [[C5]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[FCMP5]](s1), [[FADD2]], [[INT2]] + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C7]], [[C6]] + ; CHECK-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[SELECT7]], [[SELECT8]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32), [[FSUB2]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = G_FLOG10 %0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll index b1d2f7f17cdac2..f58df9274dac3d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll @@ -15,60 +15,88 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) { ; SI-SDAG-LABEL: s_log_f32: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dword s2, s[0:1], 0xb +; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: s_mov_b32 s0, 0x3f317217 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 -; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 -; SI-SDAG-NEXT: s_mov_b32 s2, -1 -; SI-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s6, -1 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s0, -v1 +; SI-SDAG-NEXT: s_mov_b32 s0, 0x3377d1cf +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s0, v2 +; SI-SDAG-NEXT: s_mov_b32 s0, 0x7f800000 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-SDAG-NEXT: s_endpgm ; ; SI-GISEL-LABEL: s_log_f32: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_load_dword s2, s[0:1], 0xb -; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: s_mov_b32 s0, 0x3f317217 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc ; SI-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: s_mov_b32 s6, -1 +; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v4, v0, s0, -v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-GISEL-NEXT: s_mov_b32 s2, -1 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; SI-GISEL-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-GISEL-NEXT: s_endpgm ; ; VI-SDAG-LABEL: s_log_f32: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 -; VI-SDAG-NEXT: v_log_f32_e32 v1, v1 -; VI-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; VI-SDAG-NEXT: s_mov_b32 s0, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_mov_b32_e32 v0, s2 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 ; VI-SDAG-NEXT: s_endpgm ; @@ -77,57 +105,85 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) { ; VI-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc ; VI-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317218, v0 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 ; VI-GISEL-NEXT: s_endpgm ; ; GFX900-SDAG-LABEL: s_log_f32: ; GFX900-SDAG: ; %bb.0: -; GFX900-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX900-SDAG-NEXT: s_load_dword s4, s[0:1], 0x2c +; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0 -; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 -; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX900-SDAG-NEXT: global_store_dword v2, v0, s[0:1] +; GFX900-SDAG-NEXT: s_mov_b32 s0, 0x3f317217 +; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s1, 0x3377d1cf +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s0, -v2 +; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s1, v3 +; GFX900-SDAG-NEXT: s_mov_b32 s0, 0x7f800000 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x41b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: global_store_dword v1, v0, s[2:3] ; GFX900-SDAG-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: s_log_f32: ; GFX900-GISEL: ; %bb.0: -; GFX900-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX900-GISEL-NEXT: s_load_dword s4, s[0:1], 0x2c +; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x3f317217 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s0, -v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[2:3] ; GFX900-GISEL-NEXT: s_endpgm ; ; GFX1100-SDAG-LABEL: s_log_f32: @@ -137,16 +193,23 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) { ; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s3 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s3 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_dual_sub_f32 v0, v1, v0 :: v_dual_mov_b32 v1, 0 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_dual_add_f32 v1, v1, v2 :: v_dual_mov_b32 v2, 0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s3 +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-SDAG-NEXT: s_endpgm ; @@ -157,22 +220,29 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) { ; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s2 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s3 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s3 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v1 :: v_dual_mov_b32 v1, 0 -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x41b17218, s3 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v0, v0, v1 :: v_dual_mov_b32 v1, 0 +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_log_f32: ; R600: ; %bb.0: -; R600-NEXT: ALU 11, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: ALU 23, @4, KC0[CB0:0-32], KC1[] ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 ; R600-NEXT: CF_END ; R600-NEXT: PAD @@ -181,18 +251,30 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) { ; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) ; R600-NEXT: CNDE * T1.W, PV.W, 1.0, literal.x, ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T1.W, KC0[2].Z, PV.W, -; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x, -; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE * T1.W, KC0[2].Z, PV.W, ; R600-NEXT: LOG_IEEE * T0.X, PV.W, -; R600-NEXT: ADD * T0.W, PS, -T0.W, -; R600-NEXT: MUL_IEEE T0.X, PV.W, literal.x, -; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, -; R600-NEXT: 1060205080(6.931472e-01), 2(2.802597e-45) +; R600-NEXT: AND_INT * T1.W, PS, literal.x, +; R600-NEXT: -4096(nan), 0(0.000000e+00) +; R600-NEXT: ADD * T2.W, T0.X, -PV.W, +; R600-NEXT: MUL_IEEE * T3.W, PV.W, literal.x, +; R600-NEXT: 939916788(3.194618e-05), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE * T3.W, T1.W, literal.x, PV.W, +; R600-NEXT: 939916788(3.194618e-05), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE * T2.W, T2.W, literal.x, PV.W, +; R600-NEXT: 1060204544(6.931152e-01), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PV.W, +; R600-NEXT: SETGT * T2.W, literal.y, |T0.X|, +; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF) +; R600-NEXT: CNDE T1.W, PS, T0.X, PV.W, +; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x, +; R600-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00) +; R600-NEXT: ADD T0.X, PV.W, -PS, +; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; ; CM-LABEL: s_log_f32: ; CM: ; %bb.0: -; CM-NEXT: ALU 15, @4, KC0[CB0:0-32], KC1[] +; CM-NEXT: ALU 26, @4, KC0[CB0:0-32], KC1[] ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X ; CM-NEXT: CF_END ; CM-NEXT: PAD @@ -201,16 +283,27 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) { ; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) ; CM-NEXT: CNDE * T1.W, PV.W, 1.0, literal.x, ; CM-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) -; CM-NEXT: CNDE T0.Z, T0.W, 0.0, literal.x, -; CM-NEXT: MUL_IEEE * T0.W, KC0[2].Z, PV.W, -; CM-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) -; CM-NEXT: LOG_IEEE T0.X, T0.W, -; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W, -; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, -; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, -; CM-NEXT: ADD * T0.W, PV.X, -T0.Z, -; CM-NEXT: MUL_IEEE * T0.X, PV.W, literal.x, -; CM-NEXT: 1060205080(6.931472e-01), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE * T1.W, KC0[2].Z, PV.W, +; CM-NEXT: LOG_IEEE T0.X, T1.W, +; CM-NEXT: LOG_IEEE T0.Y (MASKED), T1.W, +; CM-NEXT: LOG_IEEE T0.Z (MASKED), T1.W, +; CM-NEXT: LOG_IEEE * T0.W (MASKED), T1.W, +; CM-NEXT: AND_INT * T1.W, PV.X, literal.x, +; CM-NEXT: -4096(nan), 0(0.000000e+00) +; CM-NEXT: ADD * T2.W, T0.X, -PV.W, +; CM-NEXT: MUL_IEEE * T3.W, PV.W, literal.x, +; CM-NEXT: 939916788(3.194618e-05), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE * T3.W, T1.W, literal.x, PV.W, +; CM-NEXT: 939916788(3.194618e-05), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE * T2.W, T2.W, literal.x, PV.W, +; CM-NEXT: 1060204544(6.931152e-01), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE T0.Z, T1.W, literal.x, PV.W, +; CM-NEXT: SETGT * T1.W, literal.y, |T0.X|, +; CM-NEXT: 1060204544(6.931152e-01), 2139095040(INF) +; CM-NEXT: CNDE T0.Z, PV.W, T0.X, PV.Z, +; CM-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x, +; CM-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00) +; CM-NEXT: ADD * T0.X, PV.Z, -PV.W, ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %result = call float @llvm.log.f32(float %in) @@ -225,162 +318,259 @@ define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x4f800000 -; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: s_mov_b32 s8, 0x3377d1cf +; SI-SDAG-NEXT: s_mov_b32 s9, 0x7f800000 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc -; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v4, s3, v4 -; SI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 -; SI-SDAG-NEXT: v_log_f32_e32 v4, v4 -; SI-SDAG-NEXT: v_log_f32_e32 v3, v1 -; SI-SDAG-NEXT: s_mov_b32 s6, -1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v2, s3, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s3, 0x3f317217 ; SI-SDAG-NEXT: s_mov_b32 s4, s0 -; SI-SDAG-NEXT: v_sub_f32_e32 v1, v4, v2 -; SI-SDAG-NEXT: v_sub_f32_e32 v0, v3, v0 ; SI-SDAG-NEXT: s_mov_b32 s5, s1 -; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v2 +; SI-SDAG-NEXT: v_fma_f32 v4, v2, s3, -v3 +; SI-SDAG-NEXT: v_fma_f32 v4, v2, s8, v4 +; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s9 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] +; SI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x41b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v4, v0, s3, -v2 +; SI-SDAG-NEXT: v_fma_f32 v4, v0, s8, v4 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s9 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1] +; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s6, -1 +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2 ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-SDAG-NEXT: s_endpgm ; ; SI-GISEL-LABEL: s_log_v2f32: ; SI-GISEL: ; %bb.0: -; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; SI-GISEL-NEXT: s_mov_b32 s4, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x4f800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-GISEL-NEXT: s_mov_b32 s0, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-GISEL-NEXT: s_mov_b32 s2, 0x3377d1cf +; SI-GISEL-NEXT: s_mov_b32 s3, 0x7f800000 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; SI-GISEL-NEXT: v_mov_b32_e32 v2, s2 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v2 -; SI-GISEL-NEXT: v_mov_b32_e32 v4, s3 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v0, vcc -; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, s3, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, s6 +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v2 -; SI-GISEL-NEXT: v_log_f32_e32 v4, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-GISEL-NEXT: s_mov_b32 s2, -1 -; SI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v3 -; SI-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 -; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 -; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; SI-GISEL-NEXT: s_mov_b32 s0, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217 +; SI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v2 +; SI-GISEL-NEXT: v_fma_f32 v5, v2, s0, -v4 +; SI-GISEL-NEXT: v_fma_f32 v5, v2, s2, v5 +; SI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s3 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1] +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] +; SI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0 +; SI-GISEL-NEXT: v_log_f32_e32 v1, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x41b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317217, v1 +; SI-GISEL-NEXT: v_fma_f32 v3, v1, v3, -v2 +; SI-GISEL-NEXT: v_fma_f32 v3, v1, s2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s3 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[0:1] +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: s_mov_b32 s6, -1 +; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-GISEL-NEXT: s_endpgm ; ; VI-SDAG-LABEL: s_log_v2f32: ; VI-SDAG: ; %bb.0: -; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: s_mov_b32 s2, 0x7f800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc -; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v4, s3, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 -; VI-SDAG-NEXT: v_log_f32_e32 v4, v4 -; VI-SDAG-NEXT: v_log_f32_e32 v2, v1 -; VI-SDAG-NEXT: v_sub_f32_e32 v1, v4, v3 -; VI-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v3, s1 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v2, s7, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v2 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v2, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317000, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s2 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] +; VI-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x41b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317000, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1] +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, s4 +; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 ; VI-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-SDAG-NEXT: s_endpgm ; ; VI-GISEL-LABEL: s_log_v2f32: ; VI-GISEL: ; %bb.0: -; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-GISEL-NEXT: s_mov_b32 s4, 0x800000 +; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-GISEL-NEXT: s_mov_b32 s0, 0x800000 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x4f800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; VI-GISEL-NEXT: v_mov_b32_e32 v2, s2 -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v2 -; VI-GISEL-NEXT: v_mov_b32_e32 v4, s3 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v0, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v0, s3, v0 -; VI-GISEL-NEXT: v_log_f32_e32 v2, v2 -; VI-GISEL-NEXT: v_log_f32_e32 v4, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; VI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v3 -; VI-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 -; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, s6 +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v0, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v1, v1 +; VI-GISEL-NEXT: s_mov_b32 s0, 0x7f800000 +; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v1 +; VI-GISEL-NEXT: v_sub_f32_e32 v4, v1, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317000, v4 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s0 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v0, s[0:1] +; VI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0 +; VI-GISEL-NEXT: v_log_f32_e32 v2, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x41b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v2 +; VI-GISEL-NEXT: v_sub_f32_e32 v5, v2, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v6, v7, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317000, v5 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v5 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v2|, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1] +; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 ; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-GISEL-NEXT: s_endpgm ; ; GFX900-SDAG-LABEL: s_log_v2f32: ; GFX900-SDAG: ; %bb.0: -; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: s_mov_b32 s2, 0x3f317217 +; GFX900-SDAG-NEXT: s_mov_b32 s3, 0x3377d1cf ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, s3, v4 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 -; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v4 -; GFX900-SDAG-NEXT: v_log_f32_e32 v2, v1 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v4, v3 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX900-SDAG-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s7, v3 +; GFX900-SDAG-NEXT: v_log_f32_e32 v3, v3 +; GFX900-SDAG-NEXT: s_mov_b32 s7, 0x7f800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v3 +; GFX900-SDAG-NEXT: v_fma_f32 v5, v3, s2, -v4 +; GFX900-SDAG-NEXT: v_fma_f32 v5, v3, s3, v5 +; GFX900-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v3|, s7 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1] +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0x41b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v5, v0, s2, -v3 +; GFX900-SDAG-NEXT: v_fma_f32 v5, v0, s3, v5 +; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s7 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1] +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v3 +; GFX900-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX900-SDAG-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: s_log_v2f32: ; GFX900-GISEL: ; %bb.0: -; GFX900-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x4f800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-GISEL-NEXT: s_mov_b32 s2, 0x3377d1cf +; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x7f800000 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, s2 -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v2 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, s3 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v0, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s3, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, s6 +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2 ; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v2 -; GFX900-GISEL-NEXT: v_log_f32_e32 v4, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v2, v3 -; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 +; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v2 +; GFX900-GISEL-NEXT: v_fma_f32 v5, v2, s0, -v4 +; GFX900-GISEL-NEXT: v_fma_f32 v5, v2, s2, v5 +; GFX900-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1] +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0 +; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x41b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317217, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v1, v3, -v2 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v1, s2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[0:1] +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX900-GISEL-NEXT: s_endpgm ; ; GFX1100-SDAG-LABEL: s_log_v2f32: @@ -390,19 +580,29 @@ define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s3 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s5, 0x800000, s2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s4 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s5 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s4 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s5 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, s3, v2 :: v_dual_mul_f32 v3, s2, v3 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, v3 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s4 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s5 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v0, s3, v0 :: v_dual_mul_f32 v1, s2, v1 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0 -; GFX1100-SDAG-NEXT: v_dual_sub_f32 v2, v3, v1 :: v_dual_mov_b32 v3, 0 -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v1, 0x3f317218, v0 :: v_dual_mul_f32 v0, 0x3f317218, v2 +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, 0x3f317217, v0 :: v_dual_mul_f32 v3, 0x3f317217, v1 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: v_fma_f32 v4, 0x3f317217, v0, -v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v5, 0x3f317217, v1, -v3 +; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v4, 0x3377d1cf, v0 :: v_dual_fmac_f32 v5, 0x3377d1cf, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX1100-SDAG-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_add_f32 v3, v3, v5 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 0x41b17218, s4 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 0x41b17218, s5 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| +; GFX1100-SDAG-NEXT: v_dual_cndmask_b32 v2, v1, v3 :: v_dual_mov_b32 v3, 0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v0, v4 :: v_dual_sub_f32 v0, v2, v5 ; GFX1100-SDAG-NEXT: global_store_b64 v3, v[0:1], s[0:1] ; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-SDAG-NEXT: s_endpgm @@ -416,25 +616,35 @@ define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) { ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s4 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s5 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s4 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s5 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s2, v0 :: v_dual_mul_f32 v1, s3, v1 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3 -; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, 0x3f317218, v0 :: v_dual_mul_f32 v1, 0x3f317218, v1 +; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, 0x3f317217, v0 :: v_dual_mul_f32 v3, 0x3f317217, v1 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: v_fma_f32 v4, v0, 0x3f317217, -v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v5, 0x3f317217, v1, -v3 +; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v4, 0x3377d1cf, v0 :: v_dual_fmac_f32 v5, 0x3377d1cf, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX1100-GISEL-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_add_f32 v3, v3, v5 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x41b17218, s4 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x41b17218, s5 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| +; GFX1100-GISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_cndmask_b32 v1, v1, v3 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_sub_f32 v1, v1, v5 ; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_log_v2f32: ; R600: ; %bb.0: -; R600-NEXT: ALU 21, @4, KC0[CB0:0-32], KC1[] -; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 +; R600-NEXT: ALU 41, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XY, T0.X, 1 ; R600-NEXT: CF_END ; R600-NEXT: PAD ; R600-NEXT: ALU clause starting at 4: @@ -446,25 +656,45 @@ define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) { ; R600-NEXT: MUL_IEEE T2.W, KC0[3].X, PV.W, ; R600-NEXT: CNDE * T3.W, T1.W, 1.0, literal.x, ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T0.Z, KC0[2].W, PS, -; R600-NEXT: CNDE T0.W, T0.W, 0.0, literal.x, +; R600-NEXT: MUL_IEEE T3.W, KC0[2].W, PS, ; R600-NEXT: LOG_IEEE * T0.X, PV.W, -; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) -; R600-NEXT: CNDE T1.Z, T1.W, 0.0, literal.x, -; R600-NEXT: ADD T0.W, PS, -PV.W, -; R600-NEXT: LOG_IEEE * T0.X, PV.Z, -; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T0.Y, PV.W, literal.x, -; R600-NEXT: ADD * T0.W, PS, -PV.Z, -; R600-NEXT: 1060205080(6.931472e-01), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T0.X, PV.W, literal.x, -; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, -; R600-NEXT: 1060205080(6.931472e-01), 2(2.802597e-45) +; R600-NEXT: AND_INT T2.W, PS, literal.x, +; R600-NEXT: LOG_IEEE * T0.Y, PV.W, +; R600-NEXT: -4096(nan), 0(0.000000e+00) +; R600-NEXT: ADD T3.W, T0.X, -PV.W, +; R600-NEXT: AND_INT * T4.W, PS, literal.x, +; R600-NEXT: -4096(nan), 0(0.000000e+00) +; R600-NEXT: ADD T5.W, T0.Y, -PS, +; R600-NEXT: MUL_IEEE * T6.W, PV.W, literal.x, +; R600-NEXT: 939916788(3.194618e-05), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T6.W, T2.W, literal.x, PS, +; R600-NEXT: MUL_IEEE * T7.W, PV.W, literal.x, +; R600-NEXT: 939916788(3.194618e-05), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T7.W, T4.W, literal.x, PS, +; R600-NEXT: MULADD_IEEE * T3.W, T3.W, literal.y, PV.W, BS:VEC_021/SCL_122 +; R600-NEXT: 939916788(3.194618e-05), 1060204544(6.931152e-01) +; R600-NEXT: MULADD_IEEE T0.Z, T2.W, literal.x, PS, +; R600-NEXT: SETGT T2.W, literal.y, |T0.X|, +; R600-NEXT: MULADD_IEEE * T3.W, T5.W, literal.x, PV.W, BS:VEC_021/SCL_122 +; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF) +; R600-NEXT: MULADD_IEEE T1.Y, T4.W, literal.x, PS, +; R600-NEXT: SETGT T1.Z, literal.y, |T0.Y|, +; R600-NEXT: CNDE T2.W, PV.W, T0.X, PV.Z, +; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.z, +; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF) +; R600-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00) +; R600-NEXT: ADD T2.Y, PV.W, -PS, +; R600-NEXT: CNDE T0.W, PV.Z, T0.Y, PV.Y, +; R600-NEXT: CNDE * T1.W, T1.W, 0.0, literal.x, +; R600-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00) +; R600-NEXT: ADD T2.X, PV.W, -PS, +; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, +; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; ; CM-LABEL: s_log_v2f32: ; CM: ; %bb.0: -; CM-NEXT: ALU 27, @4, KC0[CB0:0-32], KC1[] -; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X +; CM-NEXT: ALU 47, @4, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X ; CM-NEXT: CF_END ; CM-NEXT: PAD ; CM-NEXT: ALU clause starting at 4: @@ -473,28 +703,48 @@ define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) { ; CM-NEXT: CNDE T0.Z, PV.W, 1.0, literal.x, ; CM-NEXT: SETGT * T1.W, literal.y, KC0[2].W, ; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38) -; CM-NEXT: CNDE T0.Y, PV.W, 1.0, literal.x, -; CM-NEXT: CNDE T1.Z, T0.W, 0.0, literal.y, -; CM-NEXT: MUL_IEEE * T0.W, KC0[3].X, PV.Z, -; CM-NEXT: 1333788672(4.294967e+09), 1107296256(3.200000e+01) -; CM-NEXT: LOG_IEEE T0.X, T0.W, -; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W, -; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, -; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, -; CM-NEXT: CNDE T1.Y, T1.W, 0.0, literal.x, -; CM-NEXT: ADD T0.Z, PV.X, -T1.Z, -; CM-NEXT: MUL_IEEE * T0.W, KC0[2].W, T0.Y, -; CM-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) -; CM-NEXT: LOG_IEEE T0.X, T0.W, -; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W, -; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, -; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, -; CM-NEXT: MUL_IEEE T0.Y, T0.Z, literal.x, -; CM-NEXT: ADD * T0.W, PV.X, -T1.Y, -; CM-NEXT: 1060205080(6.931472e-01), 0(0.000000e+00) -; CM-NEXT: MUL_IEEE * T0.X, PV.W, literal.x, -; CM-NEXT: 1060205080(6.931472e-01), 0(0.000000e+00) -; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; CM-NEXT: CNDE T1.Z, PV.W, 1.0, literal.x, +; CM-NEXT: MUL_IEEE * T2.W, KC0[3].X, PV.Z, +; CM-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) +; CM-NEXT: LOG_IEEE T0.X, T2.W, +; CM-NEXT: LOG_IEEE T0.Y (MASKED), T2.W, +; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W, +; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W, +; CM-NEXT: AND_INT T0.Z, PV.X, literal.x, +; CM-NEXT: MUL_IEEE * T2.W, KC0[2].W, T1.Z, +; CM-NEXT: -4096(nan), 0(0.000000e+00) +; CM-NEXT: LOG_IEEE T0.X (MASKED), T2.W, +; CM-NEXT: LOG_IEEE T0.Y, T2.W, +; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W, +; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W, +; CM-NEXT: ADD T1.Z, T0.X, -T0.Z, +; CM-NEXT: AND_INT * T2.W, PV.Y, literal.x, +; CM-NEXT: -4096(nan), 0(0.000000e+00) +; CM-NEXT: ADD T2.Z, T0.Y, -PV.W, +; CM-NEXT: MUL_IEEE * T3.W, PV.Z, literal.x, +; CM-NEXT: 939916788(3.194618e-05), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE T3.Z, T0.Z, literal.x, PV.W, +; CM-NEXT: MUL_IEEE * T3.W, PV.Z, literal.x, +; CM-NEXT: 939916788(3.194618e-05), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE T4.Z, T2.W, literal.x, PV.W, +; CM-NEXT: MULADD_IEEE * T3.W, T1.Z, literal.y, PV.Z, +; CM-NEXT: 939916788(3.194618e-05), 1060204544(6.931152e-01) +; CM-NEXT: MULADD_IEEE T1.Y, T0.Z, literal.x, PV.W, +; CM-NEXT: SETGT T0.Z, literal.y, |T0.X|, +; CM-NEXT: MULADD_IEEE * T3.W, T2.Z, literal.x, PV.Z, BS:VEC_120/SCL_212 +; CM-NEXT: 1060204544(6.931152e-01), 2139095040(INF) +; CM-NEXT: MULADD_IEEE T1.X, T2.W, literal.x, PV.W, +; CM-NEXT: SETGT T2.Y, literal.y, |T0.Y|, +; CM-NEXT: CNDE T0.Z, PV.Z, T0.X, PV.Y, +; CM-NEXT: CNDE * T0.W, T0.W, 0.0, literal.z, BS:VEC_120/SCL_212 +; CM-NEXT: 1060204544(6.931152e-01), 2139095040(INF) +; CM-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00) +; CM-NEXT: ADD T1.Y, PV.Z, -PV.W, +; CM-NEXT: CNDE T0.Z, PV.Y, T0.Y, PV.X, +; CM-NEXT: CNDE * T0.W, T1.W, 0.0, literal.x, +; CM-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00) +; CM-NEXT: ADD * T1.X, PV.Z, -PV.W, +; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %result = call <2 x float> @llvm.log.v2f32(<2 x float> %in) store <2 x float> %result, ptr addrspace(1) %out @@ -504,215 +754,354 @@ define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) { define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; SI-SDAG-LABEL: s_log_v3f32: ; SI-SDAG: ; %bb.0: -; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd +; SI-SDAG-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd +; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v4, s5, v4 -; SI-SDAG-NEXT: v_log_f32_e32 v4, v4 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v2, vcc -; SI-SDAG-NEXT: v_sub_f32_e32 v3, v4, v3 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc -; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 +; SI-SDAG-NEXT: s_mov_b32 s11, 0x3377d1cf +; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s9, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v1, s9, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s9, 0x3f317217 +; SI-SDAG-NEXT: s_mov_b32 s12, 0x7f800000 +; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v1 +; SI-SDAG-NEXT: v_fma_f32 v4, v1, s9, -v3 +; SI-SDAG-NEXT: v_fma_f32 v4, v1, s11, v4 +; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s12 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s8, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v2, s[0:1] +; SI-SDAG-NEXT: v_mul_f32_e32 v3, s8, v3 +; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x41b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc +; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s10, v0 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0 -; SI-SDAG-NEXT: v_mul_f32_e32 v5, s4, v5 +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v5 +; SI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317217, v3 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, s10, v0 +; SI-SDAG-NEXT: v_fma_f32 v6, v3, s9, -v5 ; SI-SDAG-NEXT: v_log_f32_e32 v2, v0 -; SI-SDAG-NEXT: v_log_f32_e32 v5, v5 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc -; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 -; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v6 -; SI-SDAG-NEXT: s_mov_b32 s2, -1 -; SI-SDAG-NEXT: v_sub_f32_e32 v0, v5, v4 -; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2 -; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v3 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; SI-SDAG-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8 -; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; SI-SDAG-NEXT: v_fma_f32 v6, v3, s11, v6 +; SI-SDAG-NEXT: v_add_f32_e32 v5, v5, v6 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, s12 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v3, v5, s[2:3] +; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1] +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v3 +; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v2 +; SI-SDAG-NEXT: v_fma_f32 v5, v2, s9, -v3 +; SI-SDAG-NEXT: v_fma_f32 v5, v2, s11, v5 +; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v5 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s12 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc +; SI-SDAG-NEXT: s_mov_b32 s6, -1 +; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-SDAG-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 +; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-SDAG-NEXT: s_endpgm ; ; SI-GISEL-LABEL: s_log_v3f32: ; SI-GISEL: ; %bb.0: -; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-GISEL-NEXT: s_mov_b32 s2, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000 +; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd +; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; SI-GISEL-NEXT: s_mov_b32 s0, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: s_mov_b32 s2, 0x3377d1cf ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v0, s8 +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 -; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 -; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 -; SI-GISEL-NEXT: v_mov_b32_e32 v4, s5 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v1, vcc -; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2 -; SI-GISEL-NEXT: v_mul_f32_e32 v4, s5, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-GISEL-NEXT: s_mov_b32 s0, 0x3f317217 +; SI-GISEL-NEXT: s_mov_b32 s3, 0x7f800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v5, v0, s0, -v4 +; SI-GISEL-NEXT: v_fma_f32 v5, v0, s2, v5 +; SI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s3 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1] +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v2, s[0:1] +; SI-GISEL-NEXT: v_mul_f32_e32 v4, s9, v4 ; SI-GISEL-NEXT: v_log_f32_e32 v4, v4 -; SI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x41b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217 +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8 +; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v4 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, s10, v1 +; SI-GISEL-NEXT: v_fma_f32 v9, v4, v3, -v8 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc -; SI-GISEL-NEXT: v_sub_f32_e32 v1, v4, v5 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 +; SI-GISEL-NEXT: v_fma_f32 v9, v4, s2, v9 +; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s3 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, v8, s[2:3] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1] +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4 +; SI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x3377d1cf +; SI-GISEL-NEXT: v_fma_f32 v3, v2, v3, -v4 +; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 +; SI-GISEL-NEXT: v_fma_f32 v3, v2, v6, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v7, vcc +; SI-GISEL-NEXT: s_mov_b32 s6, -1 +; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 -; SI-GISEL-NEXT: s_mov_b32 s2, -1 -; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2 -; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 -; SI-GISEL-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8 +; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-GISEL-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 ; SI-GISEL-NEXT: s_endpgm ; ; VI-SDAG-LABEL: s_log_v3f32: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: s_mov_b32 s8, 0x7f800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v4, s6, v4 -; VI-SDAG-NEXT: v_log_f32_e32 v4, v4 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc -; VI-SDAG-NEXT: v_sub_f32_e32 v3, v4, v3 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v2, s6, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v2, v2 +; VI-SDAG-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24 +; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v2 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v2, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317000, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s8 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1] +; VI-SDAG-NEXT: v_mul_f32_e32 v3, s5, v3 +; VI-SDAG-NEXT: v_log_f32_e32 v3, v3 +; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x41b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v5 +; VI-SDAG-NEXT: v_and_b32_e32 v5, 0xfffff000, v3 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v4, s5, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1 -; VI-SDAG-NEXT: v_log_f32_e32 v4, v4 -; VI-SDAG-NEXT: v_log_f32_e32 v6, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v3 -; VI-SDAG-NEXT: v_sub_f32_e32 v1, v4, v5 -; VI-SDAG-NEXT: v_sub_f32_e32 v0, v6, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v4, s1 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v3, s0 +; VI-SDAG-NEXT: v_sub_f32_e32 v6, v3, v5 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3f317000, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v6, v8, v6 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317000, v5 +; VI-SDAG-NEXT: v_add_f32_e32 v5, v5, v6 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, s8 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v5, s[2:3] +; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1] +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v5, v0, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317000, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v5, v7, v5 +; VI-SDAG-NEXT: v_add_f32_e32 v5, v6, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v5 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s8 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1] +; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v3 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_mov_b32_e32 v3, s6 +; VI-SDAG-NEXT: v_mov_b32_e32 v4, s7 ; VI-SDAG-NEXT: flat_store_dwordx3 v[3:4], v[0:2] ; VI-SDAG-NEXT: s_endpgm ; ; VI-GISEL-LABEL: s_log_v3f32: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 ; VI-GISEL-NEXT: s_mov_b32 s2, 0x800000 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000 +; VI-GISEL-NEXT: s_mov_b32 s0, 0x7f800000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v0 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc ; VI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 -; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 -; VI-GISEL-NEXT: v_mov_b32_e32 v4, s5 -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v1, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc +; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v4, v0, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317000, v4 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1] +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1] +; VI-GISEL-NEXT: v_mul_f32_e32 v3, s5, v3 +; VI-GISEL-NEXT: v_log_f32_e32 v3, v3 +; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x41b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v6 +; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v3 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2 +; VI-GISEL-NEXT: v_sub_f32_e32 v7, v3, v6 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v4, s5, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v7 +; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v6 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1 -; VI-GISEL-NEXT: v_log_f32_e32 v4, v4 +; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317000, v7 ; VI-GISEL-NEXT: v_log_f32_e32 v2, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; VI-GISEL-NEXT: v_sub_f32_e32 v1, v4, v5 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000 +; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6 +; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, v6, s[2:3] +; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v5, s[0:1] +; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v2 +; VI-GISEL-NEXT: v_sub_f32_e32 v6, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v7, v8, v7 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6 +; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v6 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 -; VI-GISEL-NEXT: v_mov_b32_e32 v4, s1 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, s0 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, s8 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, s9 ; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2] ; VI-GISEL-NEXT: s_endpgm ; ; GFX900-SDAG-LABEL: s_log_v3f32: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX900-SDAG-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s7, 0x3377d1cf ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, s6, v4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s6, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x3f317217 +; GFX900-SDAG-NEXT: s_mov_b32 s10, 0x7f800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s6, -v4 +; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s7, v5 +; GFX900-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s10 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1] +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, v1, s[0:1] +; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, s5, v4 ; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v4 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v4, v3 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0x41b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s5, v6 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1 -; GFX900-SDAG-NEXT: v_log_f32_e32 v6, v6 -; GFX900-SDAG-NEXT: v_log_f32_e32 v7, v1 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v3 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v6, v4 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v7, v0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX900-SDAG-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v6 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317217, v4 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v7, v4, s6, -v6 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v7, v4, s7, v7 +; GFX900-SDAG-NEXT: v_add_f32_e32 v6, v6, v7 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s10 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v4, v6, s[2:3] +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] +; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v6, v0, s6, -v4 +; GFX900-SDAG-NEXT: v_fma_f32 v6, v0, s7, v6 +; GFX900-SDAG-NEXT: v_add_f32_e32 v4, v4, v6 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s10 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1] +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v5, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v4 +; GFX900-SDAG-NEXT: global_store_dwordx3 v3, v[0:2], s[8:9] ; GFX900-SDAG-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: s_log_v3f32: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX900-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 ; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: s_mov_b32 s2, 0x3377d1cf ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, s5 -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v1, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x3f317217 +; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x7f800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v5, v0, s0, -v4 +; GFX900-GISEL-NEXT: v_fma_f32 v5, v0, s2, v5 +; GFX900-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1] +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v2, s[0:1] ; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, s5, v4 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v4, v4 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x41b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v4 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v9, v4, v3, -v8 ; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v4, v5 +; GFX900-GISEL-NEXT: v_fma_f32 v9, v4, s2, v9 +; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, v8, s[2:3] +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1] +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x3377d1cf +; GFX900-GISEL-NEXT: v_fma_f32 v3, v2, v3, -v4 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v2, v6, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v7, vcc ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0 -; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] +; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[8:9] ; GFX900-GISEL-NEXT: s_endpgm ; ; GFX1100-SDAG-LABEL: s_log_v3f32: @@ -725,26 +1114,47 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s5 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s4 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s2 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x4f800000, s3 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 1.0, 0x4f800000, s7 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s2 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s3 -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v3, s6, v3 :: v_dual_mul_f32 v4, s5, v4 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s7 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, v3 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v5, s4, v5 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v4, v4 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s7 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v9, 0, 0x41b17218, s2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v10, 0, 0x41b17218, s3 +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v0, s6, v0 :: v_dual_mul_f32 v1, s5, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v2, s4, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v3, v0 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v5, v5 -; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v4, v1 :: v_dual_mov_b32 v4, 0 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v1 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3) +; GFX1100-SDAG-NEXT: v_fma_f32 v6, 0x3f317217, v0, -v3 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_dual_sub_f32 v3, v5, v2 :: v_dual_mul_f32 v2, 0x3f317218, v0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v1, 0x3f317218, v1 :: v_dual_mul_f32 v0, 0x3f317218, v3 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317217, v2 +; GFX1100-SDAG-NEXT: v_fma_f32 v7, 0x3f317217, v1, -v4 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v6, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_fma_f32 v8, 0x3f317217, v2, -v5 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v7, 0x3377d1cf, v1 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v3, v3, v6 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, 0x41b17218, s7 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v4, v4, v7 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v8, 0x3377d1cf, v2 :: v_dual_cndmask_b32 v1, v1, v4 +; GFX1100-SDAG-NEXT: v_dual_add_f32 v5, v5, v8 :: v_dual_mov_b32 v4, 0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v1, v1, v10 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v3, v2, v5, vcc_lo +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v2, v0, v9 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v3, v6 ; GFX1100-SDAG-NEXT: global_store_b96 v4, v[0:2], s[0:1] ; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-SDAG-NEXT: s_endpgm @@ -763,32 +1173,52 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s7 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, s3 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 0x41b17218, s3 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 0x41b17218, s2 ; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s4, v0 :: v_dual_mul_f32 v1, s5, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s7 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, s6, v2 :: v_dual_sub_f32 v1, v1, v4 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2 -; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v3 :: v_dual_mov_b32 v3, 0 -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v1, 0x3f317218, v1 :: v_dual_mul_f32 v0, 0x3f317218, v0 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v1 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3) +; GFX1100-GISEL-NEXT: v_fma_f32 v6, v0, 0x3f317217, -v3 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v5 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317217, v2 +; GFX1100-GISEL-NEXT: v_fma_f32 v7, 0x3f317217, v1, -v4 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v6, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: v_fma_f32 v8, 0x3f317217, v2, -v5 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v7, 0x3377d1cf, v1 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v3, v3, v6 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x41b17218, s7 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v4, v4, v7 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| +; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v8, 0x3377d1cf, v2 :: v_dual_mov_b32 v3, 0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2| +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v1, v1, v10 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_dual_add_f32 v5, v5, v8 :: v_dual_sub_f32 v0, v0, v9 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2 +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6 ; GFX1100-GISEL-NEXT: global_store_b96 v3, v[0:2], s[0:1] ; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_log_v3f32: ; R600: ; %bb.0: -; R600-NEXT: ALU 33, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: ALU 62, @4, KC0[CB0:0-32], KC1[] ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0 -; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1 ; R600-NEXT: CF_END ; R600-NEXT: ALU clause starting at 4: ; R600-NEXT: SETGT T0.W, literal.x, KC0[3].Z, @@ -799,80 +1229,137 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; R600-NEXT: MUL_IEEE T2.W, KC0[3].Z, PV.W, ; R600-NEXT: CNDE * T3.W, T1.W, 1.0, literal.x, ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T0.Y, KC0[3].Y, PS, -; R600-NEXT: CNDE T0.Z, T0.W, 0.0, literal.x, -; R600-NEXT: SETGT T0.W, literal.y, KC0[3].W, +; R600-NEXT: MUL_IEEE T0.Z, KC0[3].Y, PS, +; R600-NEXT: SETGT T3.W, literal.x, KC0[3].W, ; R600-NEXT: LOG_IEEE * T0.X, PV.W, -; R600-NEXT: 1107296256(3.200000e+01), 8388608(1.175494e-38) -; R600-NEXT: CNDE T1.Y, T1.W, 0.0, literal.x, -; R600-NEXT: CNDE T1.Z, PV.W, 1.0, literal.y, -; R600-NEXT: ADD T1.W, PS, -PV.Z, -; R600-NEXT: LOG_IEEE * T0.X, PV.Y, -; R600-NEXT: 1107296256(3.200000e+01), 1333788672(4.294967e+09) -; R600-NEXT: MUL_IEEE T0.Y, PV.W, literal.x, -; R600-NEXT: MUL_IEEE T1.W, KC0[3].W, PV.Z, -; R600-NEXT: ADD * T2.W, PS, -PV.Y, -; R600-NEXT: 1060205080(6.931472e-01), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T0.X, PS, literal.x, -; R600-NEXT: CNDE T0.W, T0.W, 0.0, literal.y, -; R600-NEXT: LOG_IEEE * T0.Z, PV.W, -; R600-NEXT: 1060205080(6.931472e-01), 1107296256(3.200000e+01) -; R600-NEXT: LSHR T1.X, KC0[2].Y, literal.x, -; R600-NEXT: ADD * T0.W, PS, -PV.W, -; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T2.X, PV.W, literal.x, -; R600-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, -; R600-NEXT: 1060205080(6.931472e-01), 8(1.121039e-44) +; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) +; R600-NEXT: AND_INT T1.Z, PS, literal.x, +; R600-NEXT: CNDE T2.W, PV.W, 1.0, literal.y, +; R600-NEXT: LOG_IEEE * T0.Y, PV.Z, +; R600-NEXT: -4096(nan), 1333788672(4.294967e+09) +; R600-NEXT: MUL_IEEE T0.Z, KC0[3].W, PV.W, +; R600-NEXT: ADD T2.W, T0.X, -PV.Z, +; R600-NEXT: AND_INT * T4.W, PS, literal.x, +; R600-NEXT: -4096(nan), 0(0.000000e+00) +; R600-NEXT: ADD T2.Z, T0.Y, -PS, +; R600-NEXT: MUL_IEEE T5.W, PV.W, literal.x, +; R600-NEXT: LOG_IEEE * T0.Z, PV.Z, +; R600-NEXT: 939916788(3.194618e-05), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T3.Z, T1.Z, literal.x, PV.W, +; R600-NEXT: AND_INT T5.W, PS, literal.y, +; R600-NEXT: MUL_IEEE * T6.W, PV.Z, literal.x, +; R600-NEXT: 939916788(3.194618e-05), -4096(nan) +; R600-NEXT: MULADD_IEEE T4.Z, T4.W, literal.x, PS, +; R600-NEXT: ADD T6.W, T0.Z, -PV.W, +; R600-NEXT: MULADD_IEEE * T2.W, T2.W, literal.y, PV.Z, BS:VEC_021/SCL_122 +; R600-NEXT: 939916788(3.194618e-05), 1060204544(6.931152e-01) +; R600-NEXT: MULADD_IEEE T1.Y, T1.Z, literal.x, PS, +; R600-NEXT: SETGT T1.Z, literal.y, |T0.X|, +; R600-NEXT: MUL_IEEE T2.W, PV.W, literal.z, +; R600-NEXT: MULADD_IEEE * T7.W, T2.Z, literal.x, PV.Z, BS:VEC_021/SCL_122 +; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF) +; R600-NEXT: 939916788(3.194618e-05), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T1.X, T4.W, literal.x, PS, +; R600-NEXT: SETGT T2.Y, literal.y, |T0.Y|, +; R600-NEXT: MULADD_IEEE T2.Z, T5.W, literal.z, PV.W, BS:VEC_120/SCL_212 +; R600-NEXT: CNDE T2.W, PV.Z, T0.X, PV.Y, +; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.w, +; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF) +; R600-NEXT: 939916788(3.194618e-05), 1102148120(2.218071e+01) +; R600-NEXT: ADD T1.Y, PV.W, -PS, +; R600-NEXT: MULADD_IEEE T1.Z, T6.W, literal.x, PV.Z, +; R600-NEXT: CNDE T0.W, PV.Y, T0.Y, PV.X, +; R600-NEXT: CNDE * T1.W, T1.W, 0.0, literal.y, +; R600-NEXT: 1060204544(6.931152e-01), 1102148120(2.218071e+01) +; R600-NEXT: ADD T1.X, PV.W, -PS, +; R600-NEXT: MULADD_IEEE T0.W, T5.W, literal.x, PV.Z, +; R600-NEXT: SETGT * T1.W, literal.y, |T0.Z|, +; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF) +; R600-NEXT: LSHR T0.X, KC0[2].Y, literal.x, +; R600-NEXT: CNDE T0.W, PS, T0.Z, PV.W, +; R600-NEXT: CNDE * T1.W, T3.W, 0.0, literal.y, +; R600-NEXT: 2(2.802597e-45), 1102148120(2.218071e+01) +; R600-NEXT: ADD T2.X, PV.W, -PS, +; R600-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; R600-NEXT: 8(1.121039e-44), 0(0.000000e+00) ; R600-NEXT: LSHR * T3.X, PV.W, literal.x, ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; ; CM-LABEL: s_log_v3f32: ; CM: ; %bb.0: -; CM-NEXT: ALU 40, @4, KC0[CB0:0-32], KC1[] -; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T3.X -; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2.X, T1.X +; CM-NEXT: ALU 68, @4, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T2.X +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T4.X, T1.X ; CM-NEXT: CF_END ; CM-NEXT: ALU clause starting at 4: -; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].W, +; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].Y, ; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) +; CM-NEXT: CNDE T0.Z, PV.W, 1.0, literal.x, +; CM-NEXT: SETGT * T1.W, literal.y, KC0[3].W, +; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38) ; CM-NEXT: CNDE T0.Y, PV.W, 1.0, literal.x, -; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].Z, -; CM-NEXT: SETGT * T1.W, literal.y, KC0[3].Y, +; CM-NEXT: SETGT T1.Z, literal.y, KC0[3].Z, +; CM-NEXT: MUL_IEEE * T2.W, KC0[3].Y, PV.Z, ; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38) -; CM-NEXT: CNDE T0.X, PV.W, 0.0, literal.x, -; CM-NEXT: CNDE T1.Y, PV.Z, 1.0, literal.y, -; CM-NEXT: CNDE T1.Z, T0.W, 0.0, literal.x, -; CM-NEXT: MUL_IEEE * T0.W, KC0[3].W, PV.Y, -; CM-NEXT: 1107296256(3.200000e+01), 1333788672(4.294967e+09) -; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.W, -; CM-NEXT: LOG_IEEE T0.Y, T0.W, -; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, -; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, -; CM-NEXT: CNDE T1.X, T1.W, 1.0, literal.x, -; CM-NEXT: CNDE T2.Y, T0.Z, 0.0, literal.y, -; CM-NEXT: ADD T0.Z, PV.Y, -T1.Z, -; CM-NEXT: MUL_IEEE * T0.W, KC0[3].Z, T1.Y, -; CM-NEXT: 1333788672(4.294967e+09), 1107296256(3.200000e+01) -; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.W, -; CM-NEXT: LOG_IEEE T0.Y, T0.W, -; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, -; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, -; CM-NEXT: MUL_IEEE T2.X, T0.Z, literal.x, -; CM-NEXT: ADD T0.Y, PV.Y, -T2.Y, -; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y, -; CM-NEXT: MUL_IEEE * T0.W, KC0[3].Y, T1.X, -; CM-NEXT: 1060205080(6.931472e-01), 8(1.121039e-44) -; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.W, -; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W, -; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, -; CM-NEXT: LOG_IEEE * T0.W, T0.W, -; CM-NEXT: LSHR T1.X, T0.Z, literal.x, -; CM-NEXT: MUL_IEEE T0.Y, T0.Y, literal.y, -; CM-NEXT: ADD * T0.W, PV.W, -T0.X, -; CM-NEXT: 2(2.802597e-45), 1060205080(6.931472e-01) -; CM-NEXT: MUL_IEEE * T0.X, PV.W, literal.x, -; CM-NEXT: 1060205080(6.931472e-01), 0(0.000000e+00) -; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x, +; CM-NEXT: LOG_IEEE T0.X, T2.W, +; CM-NEXT: LOG_IEEE T0.Y (MASKED), T2.W, +; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W, +; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W, +; CM-NEXT: CNDE T1.Y, T1.Z, 1.0, literal.x, +; CM-NEXT: AND_INT T0.Z, PV.X, literal.y, +; CM-NEXT: MUL_IEEE * T2.W, KC0[3].W, T0.Y, +; CM-NEXT: 1333788672(4.294967e+09), -4096(nan) +; CM-NEXT: LOG_IEEE T0.X (MASKED), T2.W, +; CM-NEXT: LOG_IEEE T0.Y, T2.W, +; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W, +; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W, +; CM-NEXT: ADD T2.Y, T0.X, -T0.Z, +; CM-NEXT: AND_INT T2.Z, PV.Y, literal.x, +; CM-NEXT: MUL_IEEE * T2.W, KC0[3].Z, T1.Y, +; CM-NEXT: -4096(nan), 0(0.000000e+00) +; CM-NEXT: LOG_IEEE T1.X, T2.W, +; CM-NEXT: LOG_IEEE T1.Y (MASKED), T2.W, +; CM-NEXT: LOG_IEEE T1.Z (MASKED), T2.W, +; CM-NEXT: LOG_IEEE * T1.W (MASKED), T2.W, +; CM-NEXT: ADD T1.Y, T0.Y, -T2.Z, +; CM-NEXT: AND_INT T3.Z, PV.X, literal.x, +; CM-NEXT: MUL_IEEE * T2.W, T2.Y, literal.y, BS:VEC_120/SCL_212 +; CM-NEXT: -4096(nan), 939916788(3.194618e-05) +; CM-NEXT: MULADD_IEEE T3.Y, T0.Z, literal.x, PV.W, +; CM-NEXT: ADD T4.Z, T1.X, -PV.Z, +; CM-NEXT: MUL_IEEE * T2.W, PV.Y, literal.x, +; CM-NEXT: 939916788(3.194618e-05), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE T4.Y, T2.Z, literal.x, PV.W, +; CM-NEXT: MUL_IEEE T5.Z, PV.Z, literal.x, +; CM-NEXT: MULADD_IEEE * T2.W, T2.Y, literal.y, PV.Y, +; CM-NEXT: 939916788(3.194618e-05), 1060204544(6.931152e-01) +; CM-NEXT: MULADD_IEEE T2.Y, T0.Z, literal.x, PV.W, +; CM-NEXT: MULADD_IEEE T0.Z, T3.Z, literal.y, PV.Z, BS:VEC_120/SCL_212 +; CM-NEXT: MULADD_IEEE * T2.W, T1.Y, literal.x, PV.Y, +; CM-NEXT: 1060204544(6.931152e-01), 939916788(3.194618e-05) +; CM-NEXT: SETGT T2.X, literal.x, |T0.X|, +; CM-NEXT: MULADD_IEEE T1.Y, T2.Z, literal.y, PV.W, +; CM-NEXT: SETGT T2.Z, literal.x, |T0.Y|, +; CM-NEXT: MULADD_IEEE * T2.W, T4.Z, literal.y, PV.Z, BS:VEC_120/SCL_212 +; CM-NEXT: 2139095040(INF), 1060204544(6.931152e-01) +; CM-NEXT: MULADD_IEEE T3.X, T3.Z, literal.x, PV.W, +; CM-NEXT: SETGT T3.Y, literal.y, |T1.X|, +; CM-NEXT: CNDE T0.Z, PV.Z, T0.Y, PV.Y, +; CM-NEXT: CNDE * T1.W, T1.W, 0.0, literal.z, +; CM-NEXT: 1060204544(6.931152e-01), 2139095040(INF) +; CM-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00) +; CM-NEXT: ADD T4.X, PV.Z, -PV.W, +; CM-NEXT: CNDE T0.Y, PV.Y, T1.X, PV.X, +; CM-NEXT: CNDE T0.Z, T1.Z, 0.0, literal.x, +; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; CM-NEXT: 1102148120(2.218071e+01), 8(1.121039e-44) +; CM-NEXT: LSHR T1.X, PV.W, literal.x, +; CM-NEXT: ADD T0.Y, PV.Y, -PV.Z, +; CM-NEXT: CNDE T0.Z, T2.X, T0.X, T2.Y, +; CM-NEXT: CNDE * T0.W, T0.W, 0.0, literal.y, +; CM-NEXT: 2(2.802597e-45), 1102148120(2.218071e+01) +; CM-NEXT: ADD * T0.X, PV.Z, -PV.W, +; CM-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %result = call <3 x float> @llvm.log.v3f32(<3 x float> %in) store <3 x float> %result, ptr addrspace(1) %out @@ -884,255 +1371,433 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) { define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; SI-SDAG-LABEL: s_log_v4f32: ; SI-SDAG: ; %bb.0: -; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-SDAG-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd +; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x4f800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: s_mov_b32 s12, 0x3377d1cf ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v4, vcc -; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 -; SI-SDAG-NEXT: v_mul_f32_e32 v3, s7, v3 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v4, vcc -; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 -; SI-SDAG-NEXT: v_mul_f32_e32 v6, s6, v6 -; SI-SDAG-NEXT: v_log_f32_e32 v6, v6 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc -; SI-SDAG-NEXT: v_sub_f32_e32 v2, v3, v2 -; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0 -; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317218, v2 -; SI-SDAG-NEXT: v_sub_f32_e32 v2, v6, v5 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v4, vcc -; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v6, s5, v6 -; SI-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1 +; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s11, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v2, s11, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s11, 0x3f317217 +; SI-SDAG-NEXT: s_mov_b32 s13, 0x7f800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v5, 0x41b17218 +; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v2 +; SI-SDAG-NEXT: v_fma_f32 v4, v2, s11, -v3 +; SI-SDAG-NEXT: v_fma_f32 v4, v2, s12, v4 +; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s13 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s10, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1] +; SI-SDAG-NEXT: v_mul_f32_e32 v3, s10, v3 +; SI-SDAG-NEXT: v_log_f32_e32 v4, v3 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v2, v3 +; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s9, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v4 +; SI-SDAG-NEXT: v_fma_f32 v6, v4, s11, -v2 +; SI-SDAG-NEXT: v_fma_f32 v6, v4, s12, v6 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v6 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v6, s9, v6 ; SI-SDAG-NEXT: v_log_f32_e32 v6, v6 -; SI-SDAG-NEXT: v_log_f32_e32 v4, v1 -; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 -; SI-SDAG-NEXT: s_mov_b32 s2, -1 -; SI-SDAG-NEXT: v_sub_f32_e32 v1, v6, v5 -; SI-SDAG-NEXT: v_sub_f32_e32 v0, v4, v0 -; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2 -; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s13 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3] +; SI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s8, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] +; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v4 +; SI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v6 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, s8, v0 +; SI-SDAG-NEXT: v_fma_f32 v7, v6, s11, -v4 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_fma_f32 v7, v6, s12, v7 +; SI-SDAG-NEXT: v_add_f32_e32 v4, v4, v7 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, s13 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v6, v4, s[2:3] +; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v5, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4 +; SI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v6, v0, s11, -v4 +; SI-SDAG-NEXT: v_fma_f32 v6, v0, s12, v6 +; SI-SDAG-NEXT: v_add_f32_e32 v4, v4, v6 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s13 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] +; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s6, -1 +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v4 +; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-SDAG-NEXT: s_endpgm ; ; SI-GISEL-LABEL: s_log_v4f32: ; SI-GISEL: ; %bb.0: -; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-GISEL-NEXT: s_mov_b32 s2, 0x800000 +; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd +; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; SI-GISEL-NEXT: s_mov_b32 s0, 0x800000 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42000000 +; SI-GISEL-NEXT: s_mov_b32 s2, 0x3377d1cf ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v5, s5 +; SI-GISEL-NEXT: v_mov_b32_e32 v0, s8 +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v5, s5, v5 -; SI-GISEL-NEXT: v_log_f32_e32 v5, v5 +; SI-GISEL-NEXT: s_mov_b32 s0, 0x3f317217 +; SI-GISEL-NEXT: s_mov_b32 s3, 0x7f800000 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 -; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2 -; SI-GISEL-NEXT: v_sub_f32_e32 v1, v5, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc -; SI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v4, vcc -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v5, s6, v5 -; SI-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2 -; SI-GISEL-NEXT: v_log_f32_e32 v5, v5 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v5, v0, s0, -v1 +; SI-GISEL-NEXT: v_fma_f32 v5, v0, s2, v5 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v5 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s3 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1] +; SI-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x41b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x3f317217 +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8 +; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v1 +; SI-GISEL-NEXT: v_fma_f32 v9, v1, v4, -v8 +; SI-GISEL-NEXT: v_fma_f32 v9, v1, s2, v9 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v9, 1.0, v3, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v9, s10, v9 +; SI-GISEL-NEXT: v_log_f32_e32 v9, v9 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, s3 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[2:3] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v7, s[0:1] +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1] +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8 +; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v9 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x3377d1cf +; SI-GISEL-NEXT: v_fma_f32 v10, v9, v4, -v8 ; SI-GISEL-NEXT: v_log_f32_e32 v3, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; SI-GISEL-NEXT: v_sub_f32_e32 v2, v5, v6 +; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 +; SI-GISEL-NEXT: v_fma_f32 v10, v9, v6, v10 +; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v10 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v9|, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v8, s[2:3] +; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8 +; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v3 +; SI-GISEL-NEXT: v_fma_f32 v4, v3, v4, -v8 +; SI-GISEL-NEXT: v_fma_f32 v4, v3, v6, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v4, v8, v4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1] ; SI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 -; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 -; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2 -; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317218, v3 -; SI-GISEL-NEXT: s_mov_b32 s2, -1 -; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 -; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; SI-GISEL-NEXT: s_mov_b32 s6, -1 +; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-GISEL-NEXT: s_endpgm ; ; VI-SDAG-LABEL: s_log_v4f32: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; VI-SDAG-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc -; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v4, s7, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v6, s6, v6 -; VI-SDAG-NEXT: v_log_f32_e32 v4, v4 -; VI-SDAG-NEXT: v_log_f32_e32 v6, v6 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v2, s7, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v2, v2 +; VI-SDAG-NEXT: s_mov_b32 s7, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v2 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v2, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317000, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s7 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1] +; VI-SDAG-NEXT: v_mul_f32_e32 v3, s6, v3 +; VI-SDAG-NEXT: v_log_f32_e32 v4, v3 +; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x41b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v2, v3 +; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xfffff000, v4 +; VI-SDAG-NEXT: v_sub_f32_e32 v6, v4, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3f317000, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v6, v8, v6 +; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v3, v4, v3 -; VI-SDAG-NEXT: v_sub_f32_e32 v4, v6, v5 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v2, vcc -; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v6 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v1, vcc ; VI-SDAG-NEXT: v_mul_f32_e32 v6, s5, v6 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v6, v6 -; VI-SDAG-NEXT: v_log_f32_e32 v7, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317218, v3 -; VI-SDAG-NEXT: v_sub_f32_e32 v1, v6, v5 -; VI-SDAG-NEXT: v_sub_f32_e32 v0, v7, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v5, s1 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v4, s0 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s7 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3] +; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_and_b32_e32 v4, 0xfffff000, v6 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s4, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v7, v6, v4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] +; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3f317000, v7 +; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v7 +; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v7, v9, v7 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v7, v8, v7 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317000, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v7 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, s7 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v6, v4, s[2:3] +; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v5, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_and_b32_e32 v4, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v6, v0, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3f317000, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v6, v8, v6 +; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317000, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v6 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s7 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v4 +; VI-SDAG-NEXT: v_mov_b32_e32 v4, s8 +; VI-SDAG-NEXT: v_mov_b32_e32 v5, s9 ; VI-SDAG-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-SDAG-NEXT: s_endpgm ; ; VI-GISEL-LABEL: s_log_v4f32: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 ; VI-GISEL-NEXT: s_mov_b32 s2, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42000000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: s_mov_b32 s0, 0x7f800000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v0 -; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc ; VI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_mul_f32_e32 v5, s5, v5 -; VI-GISEL-NEXT: v_log_f32_e32 v5, v5 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 -; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2 -; VI-GISEL-NEXT: v_sub_f32_e32 v1, v5, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v4, vcc -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v5, s6, v5 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x800000 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317000, v4 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v2, s[0:1] +; VI-GISEL-NEXT: v_mul_f32_e32 v1, s5, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x41b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v6 +; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v1 +; VI-GISEL-NEXT: v_sub_f32_e32 v7, v1, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v7 +; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v6 +; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317000, v7 +; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v7, s6, v7 +; VI-GISEL-NEXT: v_log_f32_e32 v7, v7 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[2:3] +; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v5, s[0:1] +; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6 +; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v7 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v8, v7, v6 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v2, s[0:1] +; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v10, 0x3805fdf4, v6 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2 -; VI-GISEL-NEXT: v_log_f32_e32 v5, v5 +; VI-GISEL-NEXT: v_add_f32_e32 v9, v10, v9 +; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317000, v8 ; VI-GISEL-NEXT: v_log_f32_e32 v3, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; VI-GISEL-NEXT: v_sub_f32_e32 v2, v5, v6 +; VI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6 +; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v7|, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v7, v6, s[2:3] +; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6 +; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v7, v3, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v7 +; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v6 +; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317000, v7 +; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6 +; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 -; VI-GISEL-NEXT: v_mov_b32_e32 v5, s1 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317218, v3 -; VI-GISEL-NEXT: v_mov_b32_e32 v4, s0 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, s8 +; VI-GISEL-NEXT: v_mov_b32_e32 v5, s9 ; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-GISEL-NEXT: s_endpgm ; ; GFX900-SDAG-LABEL: s_log_v4f32: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX900-SDAG-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0x4f800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: s_mov_b32 s10, 0x3377d1cf ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v4, vcc -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s7, v3 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v4, vcc -; GFX900-SDAG-NEXT: v_log_f32_e32 v3, v3 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s6, v6 -; GFX900-SDAG-NEXT: v_log_f32_e32 v6, v6 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc -; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v3, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s7, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s7, 0x3f317217 +; GFX900-SDAG-NEXT: s_mov_b32 s11, 0x7f800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v6, 0x41b17218 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s7, -v3 +; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s10, v5 +; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s11 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1] +; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s6, v3 +; GFX900-SDAG-NEXT: v_log_f32_e32 v5, v3 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v6, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v2, v3 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317218, v2 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v6, v5 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v4, vcc -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s5, v6 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1 -; GFX900-SDAG-NEXT: v_log_f32_e32 v6, v6 -; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v1 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v7, 0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v6, v5 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v4, v0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX900-SDAG-NEXT: global_store_dwordx4 v7, v[0:3], s[2:3] +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v5 +; GFX900-SDAG-NEXT: v_fma_f32 v7, v5, s7, -v2 +; GFX900-SDAG-NEXT: v_fma_f32 v7, v5, s10, v7 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v7 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v7, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v7, s5, v7 +; GFX900-SDAG-NEXT: v_log_f32_e32 v7, v7 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v5|, s11 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[2:3] +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[0:1] +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] +; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v5 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317217, v7 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v8, v7, s7, -v5 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v8, v7, s10, v8 +; GFX900-SDAG-NEXT: v_add_f32_e32 v5, v5, v8 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v7|, s11 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v7, v5, s[2:3] +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v6, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v5 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v7, v0, s7, -v5 +; GFX900-SDAG-NEXT: v_fma_f32 v7, v0, s10, v7 +; GFX900-SDAG-NEXT: v_add_f32_e32 v5, v5, v7 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s11 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[0:1] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v5 +; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[8:9] ; GFX900-SDAG-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: s_log_v4f32: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX900-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 ; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x800000 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x42000000 +; GFX900-GISEL-NEXT: s_mov_b32 s2, 0x3377d1cf ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, s5 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, s5, v5 -; GFX900-GISEL-NEXT: v_log_f32_e32 v5, v5 +; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x3f317217 +; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x7f800000 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v5, v0, s0, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v5, v0, s2, v5 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v5 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1] +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s5, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x41b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x3f317217 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v9, v1, v4, -v8 +; GFX900-GISEL-NEXT: v_fma_f32 v9, v1, s2, v9 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2 -; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v5, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v4, vcc -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, s6, v5 +; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v9, 1.0, v3, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e32 v9, s6, v9 +; GFX900-GISEL-NEXT: v_log_f32_e32 v9, v9 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, s3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[2:3] +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v7, s[0:1] +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1] +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v9 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2 -; GFX900-GISEL-NEXT: v_log_f32_e32 v5, v5 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x3377d1cf +; GFX900-GISEL-NEXT: v_fma_f32 v10, v9, v4, -v8 ; GFX900-GISEL-NEXT: v_log_f32_e32 v3, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v5, v6 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 +; GFX900-GISEL-NEXT: v_fma_f32 v10, v9, v6, v10 +; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v10 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v9|, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v8, s[2:3] +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v3 +; GFX900-GISEL-NEXT: v_fma_f32 v4, v3, v4, -v8 +; GFX900-GISEL-NEXT: v_fma_f32 v4, v3, v6, v4 +; GFX900-GISEL-NEXT: v_add_f32_e32 v4, v8, v4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1] ; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317218, v3 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] +; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[8:9] ; GFX900-GISEL-NEXT: s_endpgm ; ; GFX1100-SDAG-LABEL: s_log_v4f32: @@ -1146,32 +1811,52 @@ define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s8, 0x800000, s5 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s9, 0x800000, s4 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x4f800000, s2 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 1.0, 0x4f800000, s3 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v6, 1.0, 0x4f800000, s8 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v7, 1.0, 0x4f800000, s9 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s8 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s9 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 0x41b17218, s2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v4, s7, v4 :: v_dual_mul_f32 v5, s6, v5 -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v6, s5, v6 :: v_dual_mul_f32 v7, s4, v7 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s3 +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v0, s7, v0 :: v_dual_mul_f32 v1, s6, v1 +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, s5, v2 :: v_dual_mul_f32 v3, s4, v3 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v9, 0, 0x41b17218, s3 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1100-SDAG-NEXT: v_log_f32_e32 v4, v4 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v5, v5 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(TRANS32_DEP_3) -; GFX1100-SDAG-NEXT: v_log_f32_e32 v6, v6 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v7, v7 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s8 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s9 -; GFX1100-SDAG-NEXT: v_dual_sub_f32 v0, v4, v0 :: v_dual_sub_f32 v1, v5, v1 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, v3 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v14, 0, 0x41b17218, s8 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v15, 0, 0x41b17218, s9 +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v5, 0x3f317217, v0 :: v_dual_mul_f32 v6, 0x3f317217, v1 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_dual_sub_f32 v4, v6, v2 :: v_dual_sub_f32 v5, v7, v3 -; GFX1100-SDAG-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mul_f32 v3, 0x3f317218, v0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, 0x3f317218, v1 :: v_dual_mul_f32 v1, 0x3f317218, v4 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v5 -; GFX1100-SDAG-NEXT: global_store_b128 v6, v[0:3], s[0:1] +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v7, 0x3f317217, v2 :: v_dual_mul_f32 v8, 0x3f317217, v3 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: v_fma_f32 v10, 0x3f317217, v0, -v5 +; GFX1100-SDAG-NEXT: v_fma_f32 v11, 0x3f317217, v1, -v6 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX1100-SDAG-NEXT: v_fma_f32 v12, 0x3f317217, v2, -v7 +; GFX1100-SDAG-NEXT: v_fma_f32 v13, 0x3f317217, v3, -v8 +; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v10, 0x3377d1cf, v0 :: v_dual_fmac_f32 v11, 0x3377d1cf, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v12, 0x3377d1cf, v2 :: v_dual_fmac_f32 v13, 0x3377d1cf, v3 +; GFX1100-SDAG-NEXT: v_dual_add_f32 v5, v5, v10 :: v_dual_add_f32 v6, v6, v11 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_dual_add_f32 v7, v7, v12 :: v_dual_add_f32 v8, v8, v13 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2| +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v5, v2, v7, vcc_lo +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v3| +; GFX1100-SDAG-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_sub_f32 v2, v1, v9 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v6, v3, v8, vcc_lo +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v3, v0, v4 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v5, v14 :: v_dual_sub_f32 v0, v6, v15 +; GFX1100-SDAG-NEXT: global_store_b128 v7, v[0:3], s[0:1] ; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-SDAG-NEXT: s_endpgm ; @@ -1191,133 +1876,226 @@ define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s8 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s9 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, s2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x41b17218, s2 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s4, v0 :: v_dual_mul_f32 v1, s5, v1 ; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, s6, v2 :: v_dual_mul_f32 v3, s7, v3 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s3 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 0x41b17218, s3 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(TRANS32_DEP_3) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v3, v3 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x42000000, s8 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 0x42000000, s9 -; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_sub_f32 v1, v1, v5 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 0x41b17218, s8 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 0x41b17218, s9 +; GFX1100-GISEL-NEXT: v_dual_mul_f32 v5, 0x3f317217, v0 :: v_dual_mul_f32 v6, 0x3f317217, v1 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_dual_sub_f32 v2, v2, v6 :: v_dual_sub_f32 v3, v3, v7 -; GFX1100-GISEL-NEXT: v_mov_b32_e32 v4, 0 -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, 0x3f317218, v0 :: v_dual_mul_f32 v1, 0x3f317218, v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, 0x3f317218, v2 :: v_dual_mul_f32 v3, 0x3f317218, v3 +; GFX1100-GISEL-NEXT: v_dual_mul_f32 v7, 0x3f317217, v2 :: v_dual_mul_f32 v8, 0x3f317217, v3 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: v_fma_f32 v10, v0, 0x3f317217, -v5 +; GFX1100-GISEL-NEXT: v_fma_f32 v11, 0x3f317217, v1, -v6 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX1100-GISEL-NEXT: v_fma_f32 v12, 0x3f317217, v2, -v7 +; GFX1100-GISEL-NEXT: v_fma_f32 v13, 0x3f317217, v3, -v8 +; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v10, 0x3377d1cf, v0 :: v_dual_fmac_f32 v11, 0x3377d1cf, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v12, 0x3377d1cf, v2 :: v_dual_fmac_f32 v13, 0x3377d1cf, v3 +; GFX1100-GISEL-NEXT: v_dual_add_f32 v5, v5, v10 :: v_dual_add_f32 v6, v6, v11 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_dual_add_f32 v7, v7, v12 :: v_dual_add_f32 v8, v8, v13 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2| +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_sub_f32 v1, v1, v9 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v3| +; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v3, v3, v8 :: v_dual_sub_f32 v2, v2, v14 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v3, v3, v15 ; GFX1100-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1] ; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_log_v4f32: ; R600: ; %bb.0: -; R600-NEXT: ALU 38, @4, KC0[CB0:0-32], KC1[] -; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 +; R600-NEXT: ALU 76, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T0.X, 1 ; R600-NEXT: CF_END ; R600-NEXT: PAD ; R600-NEXT: ALU clause starting at 4: -; R600-NEXT: SETGT T0.W, literal.x, KC0[4].X, -; R600-NEXT: SETGT * T1.W, literal.x, KC0[3].W, +; R600-NEXT: SETGT T0.W, literal.x, KC0[3].Z, +; R600-NEXT: SETGT * T1.W, literal.x, KC0[4].X, ; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) ; R600-NEXT: CNDE * T2.W, PV.W, 1.0, literal.x, ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T0.Z, KC0[4].X, PV.W, -; R600-NEXT: SETGT T2.W, literal.x, KC0[3].Z, +; R600-NEXT: MUL_IEEE T0.Z, KC0[3].Z, PV.W, +; R600-NEXT: SETGT T2.W, literal.x, KC0[3].W, ; R600-NEXT: CNDE * T3.W, T1.W, 1.0, literal.y, ; R600-NEXT: 8388608(1.175494e-38), 1333788672(4.294967e+09) -; R600-NEXT: MUL_IEEE T0.X, KC0[3].W, PS, -; R600-NEXT: SETGT T0.Y, literal.x, KC0[3].Y, -; R600-NEXT: CNDE T1.Z, T0.W, 0.0, literal.y, -; R600-NEXT: CNDE T0.W, PV.W, 1.0, literal.z, -; R600-NEXT: LOG_IEEE * T0.Z, PV.Z, -; R600-NEXT: 8388608(1.175494e-38), 1107296256(3.200000e+01) +; R600-NEXT: MUL_IEEE T1.Z, KC0[4].X, PS, +; R600-NEXT: CNDE T3.W, PV.W, 1.0, literal.x, +; R600-NEXT: LOG_IEEE * T0.X, PV.Z, ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T1.X, KC0[3].Z, PV.W, -; R600-NEXT: ADD T1.Y, PS, -PV.Z, -; R600-NEXT: CNDE T0.Z, T1.W, 0.0, literal.x, -; R600-NEXT: CNDE T0.W, PV.Y, 1.0, literal.y, -; R600-NEXT: LOG_IEEE * T0.X, PV.X, -; R600-NEXT: 1107296256(3.200000e+01), 1333788672(4.294967e+09) -; R600-NEXT: MUL_IEEE T2.X, KC0[3].Y, PV.W, -; R600-NEXT: CNDE T2.Y, T2.W, 0.0, literal.x, -; R600-NEXT: ADD T0.Z, PS, -PV.Z, -; R600-NEXT: MUL_IEEE T0.W, PV.Y, literal.y, -; R600-NEXT: LOG_IEEE * T0.X, PV.X, -; R600-NEXT: 1107296256(3.200000e+01), 1060205080(6.931472e-01) -; R600-NEXT: CNDE T1.Y, T0.Y, 0.0, literal.x, -; R600-NEXT: MUL_IEEE T0.Z, PV.Z, literal.y, -; R600-NEXT: ADD T1.W, PS, -PV.Y, -; R600-NEXT: LOG_IEEE * T0.X, PV.X, -; R600-NEXT: 1107296256(3.200000e+01), 1060205080(6.931472e-01) -; R600-NEXT: MUL_IEEE T0.Y, PV.W, literal.x, -; R600-NEXT: ADD * T1.W, PS, -PV.Y, -; R600-NEXT: 1060205080(6.931472e-01), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T0.X, PV.W, literal.x, -; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, -; R600-NEXT: 1060205080(6.931472e-01), 2(2.802597e-45) +; R600-NEXT: MUL_IEEE T0.Z, KC0[3].W, PV.W, +; R600-NEXT: SETGT T3.W, literal.x, KC0[3].Y, +; R600-NEXT: LOG_IEEE * T0.Y, PV.Z, +; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) +; R600-NEXT: AND_INT T1.Y, PS, literal.x, +; R600-NEXT: AND_INT T1.Z, T0.X, literal.x, +; R600-NEXT: CNDE T4.W, PV.W, 1.0, literal.y, +; R600-NEXT: LOG_IEEE * T0.Z, PV.Z, +; R600-NEXT: -4096(nan), 1333788672(4.294967e+09) +; R600-NEXT: MUL_IEEE T2.Y, KC0[3].Y, PV.W, +; R600-NEXT: ADD T2.Z, T0.X, -PV.Z, +; R600-NEXT: AND_INT T4.W, PS, literal.x, +; R600-NEXT: ADD * T5.W, T0.Y, -PV.Y, +; R600-NEXT: -4096(nan), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T3.Y, PS, literal.x, +; R600-NEXT: ADD T3.Z, T0.Z, -PV.W, +; R600-NEXT: MUL_IEEE T6.W, PV.Z, literal.x, +; R600-NEXT: LOG_IEEE * T1.X, PV.Y, +; R600-NEXT: 939916788(3.194618e-05), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T2.Y, T1.Z, literal.x, PV.W, +; R600-NEXT: MUL_IEEE T4.Z, PV.Z, literal.x, +; R600-NEXT: AND_INT T6.W, PS, literal.y, +; R600-NEXT: MULADD_IEEE * T7.W, T1.Y, literal.x, PV.Y, BS:VEC_021/SCL_122 +; R600-NEXT: 939916788(3.194618e-05), -4096(nan) +; R600-NEXT: MULADD_IEEE T3.Y, T5.W, literal.x, PS, +; R600-NEXT: ADD T5.Z, T1.X, -PV.W, +; R600-NEXT: MULADD_IEEE T5.W, T4.W, literal.y, PV.Z, BS:VEC_120/SCL_212 +; R600-NEXT: MULADD_IEEE * T7.W, T2.Z, literal.x, PV.Y, BS:VEC_021/SCL_122 +; R600-NEXT: 1060204544(6.931152e-01), 939916788(3.194618e-05) +; R600-NEXT: MULADD_IEEE T2.X, T1.Z, literal.x, PS, +; R600-NEXT: MULADD_IEEE T2.Y, T3.Z, literal.x, PV.W, BS:VEC_120/SCL_212 +; R600-NEXT: MUL_IEEE T1.Z, PV.Z, literal.y, +; R600-NEXT: MULADD_IEEE T5.W, T1.Y, literal.x, PV.Y, +; R600-NEXT: SETGT * T7.W, literal.z, |T0.Y|, +; R600-NEXT: 1060204544(6.931152e-01), 939916788(3.194618e-05) +; R600-NEXT: 2139095040(INF), 0(0.000000e+00) +; R600-NEXT: CNDE T3.X, PS, T0.Y, PV.W, +; R600-NEXT: CNDE T0.Y, T1.W, 0.0, literal.x, +; R600-NEXT: MULADD_IEEE T1.Z, T6.W, literal.y, PV.Z, BS:VEC_120/SCL_212 +; R600-NEXT: MULADD_IEEE T1.W, T4.W, literal.z, PV.Y, BS:VEC_201 +; R600-NEXT: SETGT * T4.W, literal.w, |T0.Z|, +; R600-NEXT: 1102148120(2.218071e+01), 939916788(3.194618e-05) +; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF) +; R600-NEXT: SETGT T4.X, literal.x, |T0.X|, +; R600-NEXT: CNDE T1.Y, PS, T0.Z, PV.W, +; R600-NEXT: CNDE T0.Z, T2.W, 0.0, literal.y, +; R600-NEXT: MULADD_IEEE T1.W, T5.Z, literal.z, PV.Z, +; R600-NEXT: ADD * T2.W, PV.X, -PV.Y, +; R600-NEXT: 2139095040(INF), 1102148120(2.218071e+01) +; R600-NEXT: 1060204544(6.931152e-01), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T3.X, T6.W, literal.x, PV.W, +; R600-NEXT: SETGT T0.Y, literal.y, |T1.X|, +; R600-NEXT: ADD T2.Z, PV.Y, -PV.Z, +; R600-NEXT: CNDE T1.W, PV.X, T0.X, T2.X, BS:VEC_120/SCL_212 +; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.z, +; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF) +; R600-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00) +; R600-NEXT: ADD T2.Y, PV.W, -PS, +; R600-NEXT: CNDE T0.W, PV.Y, T1.X, PV.X, +; R600-NEXT: CNDE * T1.W, T3.W, 0.0, literal.x, +; R600-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00) +; R600-NEXT: ADD T2.X, PV.W, -PS, +; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, +; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; ; CM-LABEL: s_log_v4f32: ; CM: ; %bb.0: -; CM-NEXT: ALU 50, @4, KC0[CB0:0-32], KC1[] -; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X +; CM-NEXT: ALU 84, @4, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2, T0.X ; CM-NEXT: CF_END ; CM-NEXT: PAD ; CM-NEXT: ALU clause starting at 4: -; CM-NEXT: SETGT T0.Z, literal.x, KC0[4].X, ; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].Y, ; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) ; CM-NEXT: CNDE T0.Y, PV.W, 1.0, literal.x, -; CM-NEXT: CNDE T1.Z, PV.Z, 1.0, literal.x, -; CM-NEXT: SETGT * T1.W, literal.y, KC0[3].W, +; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].W, +; CM-NEXT: SETGT * T1.W, literal.y, KC0[4].X, ; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38) ; CM-NEXT: CNDE T0.X, PV.W, 1.0, literal.x, -; CM-NEXT: SETGT T1.Y, literal.y, KC0[3].Z, -; CM-NEXT: CNDE T0.Z, T0.Z, 0.0, literal.z, -; CM-NEXT: MUL_IEEE * T2.W, KC0[4].X, PV.Z, +; CM-NEXT: CNDE T1.Y, PV.Z, 1.0, literal.x, +; CM-NEXT: SETGT T1.Z, literal.y, KC0[3].Z, +; CM-NEXT: MUL_IEEE * T2.W, KC0[3].Y, PV.Y, ; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38) -; CM-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) +; CM-NEXT: LOG_IEEE T0.X (MASKED), T2.W, +; CM-NEXT: LOG_IEEE T0.Y, T2.W, +; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W, +; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W, +; CM-NEXT: CNDE T1.X, T1.Z, 1.0, literal.x, +; CM-NEXT: AND_INT T2.Y, PV.Y, literal.y, +; CM-NEXT: MUL_IEEE T2.Z, KC0[3].W, T1.Y, +; CM-NEXT: MUL_IEEE * T2.W, KC0[4].X, T0.X, +; CM-NEXT: 1333788672(4.294967e+09), -4096(nan) +; CM-NEXT: LOG_IEEE T0.X, T2.W, +; CM-NEXT: LOG_IEEE T0.Y (MASKED), T2.W, +; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W, +; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W, +; CM-NEXT: LOG_IEEE T1.X (MASKED), T2.Z, +; CM-NEXT: LOG_IEEE T1.Y, T2.Z, +; CM-NEXT: LOG_IEEE T1.Z (MASKED), T2.Z, +; CM-NEXT: LOG_IEEE * T1.W (MASKED), T2.Z, +; CM-NEXT: ADD T2.X, T0.Y, -T2.Y, +; CM-NEXT: AND_INT T3.Y, PV.Y, literal.x, +; CM-NEXT: AND_INT T2.Z, T0.X, literal.x, +; CM-NEXT: MUL_IEEE * T2.W, KC0[3].Z, T1.X, +; CM-NEXT: -4096(nan), 0(0.000000e+00) ; CM-NEXT: LOG_IEEE T1.X, T2.W, ; CM-NEXT: LOG_IEEE T1.Y (MASKED), T2.W, ; CM-NEXT: LOG_IEEE T1.Z (MASKED), T2.W, ; CM-NEXT: LOG_IEEE * T1.W (MASKED), T2.W, -; CM-NEXT: ADD T1.X, PV.X, -T0.Z, -; CM-NEXT: CNDE T2.Y, T1.Y, 1.0, literal.x, -; CM-NEXT: CNDE T0.Z, T1.W, 0.0, literal.y, -; CM-NEXT: MUL_IEEE * T1.W, KC0[3].W, T0.X, -; CM-NEXT: 1333788672(4.294967e+09), 1107296256(3.200000e+01) -; CM-NEXT: LOG_IEEE T0.X, T1.W, -; CM-NEXT: LOG_IEEE T0.Y (MASKED), T1.W, -; CM-NEXT: LOG_IEEE T0.Z (MASKED), T1.W, -; CM-NEXT: LOG_IEEE * T0.W (MASKED), T1.W, -; CM-NEXT: ADD T0.X, PV.X, -T0.Z, -; CM-NEXT: CNDE T1.Y, T1.Y, 0.0, literal.x, -; CM-NEXT: MUL_IEEE T0.Z, KC0[3].Z, T2.Y, -; CM-NEXT: MUL_IEEE * T1.W, T1.X, literal.y, -; CM-NEXT: 1107296256(3.200000e+01), 1060205080(6.931472e-01) -; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.Z, -; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.Z, -; CM-NEXT: LOG_IEEE T0.Z, T0.Z, -; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.Z, -; CM-NEXT: CNDE T1.X, T0.W, 0.0, literal.x, -; CM-NEXT: ADD T1.Y, PV.Z, -T1.Y, -; CM-NEXT: MUL_IEEE T1.Z, T0.X, literal.y, -; CM-NEXT: MUL_IEEE * T0.W, KC0[3].Y, T0.Y, BS:VEC_021/SCL_122 -; CM-NEXT: 1107296256(3.200000e+01), 1060205080(6.931472e-01) -; CM-NEXT: LOG_IEEE T0.X, T0.W, -; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W, -; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, -; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, -; CM-NEXT: MUL_IEEE T1.Y, T1.Y, literal.x, -; CM-NEXT: ADD * T0.W, PV.X, -T1.X, -; CM-NEXT: 1060205080(6.931472e-01), 0(0.000000e+00) -; CM-NEXT: MUL_IEEE * T1.X, PV.W, literal.x, -; CM-NEXT: 1060205080(6.931472e-01), 0(0.000000e+00) +; CM-NEXT: ADD T3.X, T0.X, -T2.Z, +; CM-NEXT: ADD T4.Y, T1.Y, -T3.Y, +; CM-NEXT: AND_INT T3.Z, PV.X, literal.x, +; CM-NEXT: MUL_IEEE * T2.W, T2.X, literal.y, BS:VEC_120/SCL_212 +; CM-NEXT: -4096(nan), 939916788(3.194618e-05) +; CM-NEXT: MULADD_IEEE T4.X, T2.Y, literal.x, PV.W, +; CM-NEXT: ADD T5.Y, T1.X, -PV.Z, +; CM-NEXT: MUL_IEEE T4.Z, PV.Y, literal.x, +; CM-NEXT: MUL_IEEE * T2.W, PV.X, literal.x, +; CM-NEXT: 939916788(3.194618e-05), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE T5.X, T2.Z, literal.x, PV.W, +; CM-NEXT: MULADD_IEEE T6.Y, T3.Y, literal.x, PV.Z, +; CM-NEXT: MUL_IEEE T4.Z, PV.Y, literal.x, +; CM-NEXT: MULADD_IEEE * T2.W, T2.X, literal.y, PV.X, +; CM-NEXT: 939916788(3.194618e-05), 1060204544(6.931152e-01) +; CM-NEXT: MULADD_IEEE T2.X, T2.Y, literal.x, PV.W, +; CM-NEXT: MULADD_IEEE T2.Y, T3.Z, literal.y, PV.Z, +; CM-NEXT: MULADD_IEEE T4.Z, T4.Y, literal.x, PV.Y, BS:VEC_120/SCL_212 +; CM-NEXT: MULADD_IEEE * T2.W, T3.X, literal.x, PV.X, +; CM-NEXT: 1060204544(6.931152e-01), 939916788(3.194618e-05) +; CM-NEXT: MULADD_IEEE T3.X, T2.Z, literal.x, PV.W, +; CM-NEXT: SETGT T4.Y, literal.y, |T0.X|, +; CM-NEXT: MULADD_IEEE T2.Z, T3.Y, literal.x, PV.Z, +; CM-NEXT: SETGT * T2.W, literal.y, |T1.Y|, +; CM-NEXT: 1060204544(6.931152e-01), 2139095040(INF) +; CM-NEXT: CNDE T4.X, PV.W, T1.Y, PV.Z, +; CM-NEXT: CNDE T1.Y, PV.Y, T0.X, PV.X, +; CM-NEXT: CNDE T2.Z, T1.W, 0.0, literal.x, +; CM-NEXT: MULADD_IEEE * T1.W, T5.Y, literal.y, T2.Y, +; CM-NEXT: 1102148120(2.218071e+01), 1060204544(6.931152e-01) +; CM-NEXT: CNDE T0.X, T0.Z, 0.0, literal.x, +; CM-NEXT: MULADD_IEEE T2.Y, T3.Z, literal.y, PV.W, BS:VEC_120/SCL_212 +; CM-NEXT: SETGT T0.Z, literal.z, |T1.X|, +; CM-NEXT: ADD * T2.W, PV.Y, -PV.Z, +; CM-NEXT: 1102148120(2.218071e+01), 1060204544(6.931152e-01) +; CM-NEXT: 2139095040(INF), 0(0.000000e+00) +; CM-NEXT: SETGT T3.X, literal.x, |T0.Y|, +; CM-NEXT: CNDE T1.Y, PV.Z, T1.X, PV.Y, +; CM-NEXT: ADD T2.Z, T4.X, -PV.X, +; CM-NEXT: CNDE * T1.W, T1.Z, 0.0, literal.y, +; CM-NEXT: 2139095040(INF), 1102148120(2.218071e+01) +; CM-NEXT: ADD T2.Y, PV.Y, -PV.W, +; CM-NEXT: CNDE T0.Z, PV.X, T0.Y, T2.X, +; CM-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x, +; CM-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00) +; CM-NEXT: ADD * T2.X, PV.Z, -PV.W, ; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %result = call <4 x float> @llvm.log.v4f32(<4 x float> %in) @@ -1326,49 +2104,170 @@ define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) { } define float @v_log_f32(float %in) { -; GFX689-SDAG-LABEL: v_log_f32: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log_f32: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log_f32: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log_f32: @@ -1376,14 +2275,21 @@ define float @v_log_f32(float %in) { ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32: @@ -1400,49 +2306,170 @@ define float @v_log_f32(float %in) { } define float @v_log_fabs_f32(float %in) { -; GFX689-SDAG-LABEL: v_log_fabs_f32: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log_fabs_f32: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-SDAG-LABEL: v_log_fabs_f32: -; GFX1100-SDAG: ; %bb.0: -; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-LABEL: v_log_fabs_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log_fabs_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_fabs_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_fabs_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_fabs_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_fabs_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log_fabs_f32: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 -; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 +; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log_fabs_f32: @@ -1452,12 +2479,19 @@ define float @v_log_fabs_f32(float %in) { ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_fabs_f32: @@ -1475,49 +2509,170 @@ define float @v_log_fabs_f32(float %in) { } define float @v_log_fneg_fabs_f32(float %in) { -; GFX689-SDAG-LABEL: v_log_fneg_fabs_f32: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x80800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log_fneg_fabs_f32: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log_fneg_fabs_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log_fneg_fabs_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_fneg_fabs_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_fneg_fabs_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_fneg_fabs_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x80800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_fneg_fabs_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log_fneg_fabs_f32: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e64 s0, 0x80800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 -; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 +; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log_fneg_fabs_f32: @@ -1527,12 +2682,19 @@ define float @v_log_fneg_fabs_f32(float %in) { ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_fneg_fabs_f32: @@ -1551,49 +2713,170 @@ define float @v_log_fneg_fabs_f32(float %in) { } define float @v_log_fneg_f32(float %in) { -; GFX689-SDAG-LABEL: v_log_fneg_f32: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x80800000 -; GFX689-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log_fneg_f32: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log_fneg_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log_fneg_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_fneg_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_fneg_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_fneg_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x80800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_fneg_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log_fneg_f32: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0x80800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log_fneg_f32: @@ -1603,12 +2886,19 @@ define float @v_log_fneg_f32(float %in) { ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_fneg_f32: @@ -1626,50 +2916,20 @@ define float @v_log_fneg_f32(float %in) { } define float @v_log_f32_fast(float %in) { -; GFX689-SDAG-LABEL: v_log_f32_fast: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log_f32_fast: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-SDAG-LABEL: v_log_f32_fast: -; GFX1100-SDAG: ; %bb.0: -; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_fast: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-GISEL-LABEL: v_log_f32_fast: -; GFX1100-GISEL: ; %bb.0: -; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: v_log_f32_fast: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_fast: ; R600: ; %bb.0: @@ -1743,49 +3003,170 @@ define float @v_log_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { } define float @v_log_f32_ninf(float %in) { -; GFX689-SDAG-LABEL: v_log_f32_ninf: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log_f32_ninf: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log_f32_ninf: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log_f32_ninf: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_f32_ninf: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_f32_ninf: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_f32_ninf: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_f32_ninf: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log_f32_ninf: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log_f32_ninf: @@ -1793,14 +3174,21 @@ define float @v_log_f32_ninf(float %in) { ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_ninf: @@ -1817,50 +3205,20 @@ define float @v_log_f32_ninf(float %in) { } define float @v_log_f32_afn(float %in) { -; GFX689-SDAG-LABEL: v_log_f32_afn: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log_f32_afn: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-SDAG-LABEL: v_log_f32_afn: -; GFX1100-SDAG: ; %bb.0: -; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_afn: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-GISEL-LABEL: v_log_f32_afn: -; GFX1100-GISEL: ; %bb.0: -; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: v_log_f32_afn: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_afn: ; R600: ; %bb.0: @@ -1905,50 +3263,20 @@ define float @v_log_f32_afn_daz(float %in) #0 { } define float @v_log_f32_afn_dynamic(float %in) #1 { -; GFX689-SDAG-LABEL: v_log_f32_afn_dynamic: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log_f32_afn_dynamic: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-SDAG-LABEL: v_log_f32_afn_dynamic: -; GFX1100-SDAG: ; %bb.0: -; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_afn_dynamic: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-GISEL-LABEL: v_log_f32_afn_dynamic: -; GFX1100-GISEL: ; %bb.0: -; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: v_log_f32_afn_dynamic: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_afn_dynamic: ; R600: ; %bb.0: @@ -1964,50 +3292,20 @@ define float @v_log_f32_afn_dynamic(float %in) #1 { } define float @v_fabs_log_f32_afn(float %in) { -; GFX689-SDAG-LABEL: v_fabs_log_f32_afn: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_fabs_log_f32_afn: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_log_f32_e64 v0, |v0| -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-SDAG-LABEL: v_fabs_log_f32_afn: -; GFX1100-SDAG: ; %bb.0: -; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 -; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_fabs_log_f32_afn: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e64 v0, |v0| +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-GISEL-LABEL: v_fabs_log_f32_afn: -; GFX1100-GISEL: ; %bb.0: -; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: v_log_f32_e64 v0, |v0| -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: v_fabs_log_f32_afn: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_log_f32_e64 v0, |v0| +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_fabs_log_f32_afn: ; R600: ; %bb.0: @@ -2024,20 +3322,125 @@ define float @v_fabs_log_f32_afn(float %in) { } define float @v_log_f32_daz(float %in) #0 { -; GFX689-LABEL: v_log_f32_daz: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log_f32_daz: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf +; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 +; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log_f32_daz: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log_f32_daz: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 +; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_f32_daz: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_f32_daz: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v3 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v2 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_f32_daz: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf +; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 +; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_f32_daz: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 +; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log_f32_daz: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log_f32_daz: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_daz: ; R600: ; %bb.0: @@ -2053,49 +3456,170 @@ define float @v_log_f32_daz(float %in) #0 { } define float @v_log_f32_nnan(float %in) { -; GFX689-SDAG-LABEL: v_log_f32_nnan: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log_f32_nnan: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log_f32_nnan: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log_f32_nnan: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_f32_nnan: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_f32_nnan: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_f32_nnan: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_f32_nnan: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log_f32_nnan: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log_f32_nnan: @@ -2103,14 +3627,21 @@ define float @v_log_f32_nnan(float %in) { ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_nnan: @@ -2127,20 +3658,125 @@ define float @v_log_f32_nnan(float %in) { } define float @v_log_f32_nnan_daz(float %in) #0 { -; GFX689-LABEL: v_log_f32_nnan_daz: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log_f32_nnan_daz: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf +; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 +; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log_f32_nnan_daz: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log_f32_nnan_daz: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 +; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_f32_nnan_daz: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_f32_nnan_daz: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v3 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v2 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_f32_nnan_daz: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf +; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 +; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_f32_nnan_daz: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 +; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log_f32_nnan_daz: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log_f32_nnan_daz: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_nnan_daz: ; R600: ; %bb.0: @@ -2156,49 +3792,164 @@ define float @v_log_f32_nnan_daz(float %in) #0 { } define float @v_log_f32_nnan_dynamic(float %in) #1 { -; GFX689-SDAG-LABEL: v_log_f32_nnan_dynamic: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log_f32_nnan_dynamic: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log_f32_nnan_dynamic: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log_f32_nnan_dynamic: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_f32_nnan_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_f32_nnan_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2 +; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_f32_nnan_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_f32_nnan_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log_f32_nnan_dynamic: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log_f32_nnan_dynamic: @@ -2206,14 +3957,21 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 { ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_nnan_dynamic: @@ -2230,20 +3988,125 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 { } define float @v_log_f32_ninf_daz(float %in) #0 { -; GFX689-LABEL: v_log_f32_ninf_daz: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log_f32_ninf_daz: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf +; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 +; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log_f32_ninf_daz: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log_f32_ninf_daz: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 +; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_f32_ninf_daz: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_f32_ninf_daz: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v3 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v2 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_f32_ninf_daz: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf +; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 +; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_f32_ninf_daz: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 +; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log_f32_ninf_daz: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log_f32_ninf_daz: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_ninf_daz: ; R600: ; %bb.0: @@ -2259,169 +4122,466 @@ define float @v_log_f32_ninf_daz(float %in) #0 { } define float @v_log_f32_ninf_dynamic(float %in) #1 { -; GFX689-SDAG-LABEL: v_log_f32_ninf_dynamic: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log_f32_ninf_dynamic: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log_f32_ninf_dynamic: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log_f32_ninf_dynamic: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_f32_ninf_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_f32_ninf_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2 +; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_f32_ninf_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_f32_ninf_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log_f32_ninf_dynamic: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log_f32_ninf_dynamic: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_log_f32_ninf_dynamic: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_log_f32_ninf_dynamic: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call ninf float @llvm.log.f32(float %in) + ret float %result +} + +define float @v_log_f32_nnan_ninf(float %in) { +; SI-SDAG-LABEL: v_log_f32_nnan_ninf: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2 +; SI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log_f32_nnan_ninf: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_f32_nnan_ninf: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317000, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3805fdf4, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v3, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v2, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_f32_nnan_ninf: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v0 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317000, v0 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_f32_nnan_ninf: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2 +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_f32_nnan_ninf: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-SDAG-LABEL: v_log_f32_ninf_dynamic: +; GFX1100-SDAG-LABEL: v_log_f32_nnan_ninf: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-GISEL-LABEL: v_log_f32_ninf_dynamic: +; GFX1100-GISEL-LABEL: v_log_f32_nnan_ninf: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; R600-LABEL: v_log_f32_ninf_dynamic: +; R600-LABEL: v_log_f32_nnan_ninf: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; -; CM-LABEL: v_log_f32_ninf_dynamic: +; CM-LABEL: v_log_f32_nnan_ninf: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD - %result = call ninf float @llvm.log.f32(float %in) + %result = call nnan ninf float @llvm.log.f32(float %in) ret float %result } -define float @v_log_f32_nnan_ninf(float %in) { -; GFX689-SDAG-LABEL: v_log_f32_nnan_ninf: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log_f32_nnan_ninf: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +define float @v_log_f32_nnan_ninf_daz(float %in) #0 { +; SI-SDAG-LABEL: v_log_f32_nnan_ninf_daz: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_log_f32_e32 v1, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317217, v1 +; SI-SDAG-NEXT: v_fma_f32 v0, v1, s4, -v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; SI-SDAG-NEXT: v_fma_f32 v0, v1, s4, v0 +; SI-SDAG-NEXT: v_mac_f32_e32 v0, 0x3f317217, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-SDAG-LABEL: v_log_f32_nnan_ninf: +; SI-GISEL-LABEL: v_log_f32_nnan_ninf_daz: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_log_f32_e32 v1, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317217, v1 +; SI-GISEL-NEXT: v_fma_f32 v0, v1, s4, -v0 +; SI-GISEL-NEXT: v_fma_f32 v0, v1, v2, v0 +; SI-GISEL-NEXT: v_mac_f32_e32 v0, 0x3f317217, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: v_log_f32_nnan_ninf_daz: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_log_f32_e32 v0, v0 +; VI-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-NEXT: v_mul_f32_e32 v0, 0x3805fdf4, v2 +; VI-NEXT: v_mac_f32_e32 v0, 0x3805fdf4, v1 +; VI-NEXT: v_mac_f32_e32 v0, 0x3f317000, v2 +; VI-NEXT: v_mac_f32_e32 v0, 0x3f317000, v1 +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_f32_nnan_ninf_daz: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317217, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v0, v1, s4, -v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; GFX900-SDAG-NEXT: v_fma_f32 v0, v1, s4, v0 +; GFX900-SDAG-NEXT: v_mac_f32_e32 v0, 0x3f317217, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_f32_nnan_ninf_daz: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317217, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v0, v1, s4, -v0 +; GFX900-GISEL-NEXT: v_fma_f32 v0, v1, v2, v0 +; GFX900-GISEL-NEXT: v_mac_f32_e32 v0, 0x3f317217, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log_f32_nnan_ninf_daz: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-GISEL-LABEL: v_log_f32_nnan_ninf: +; GFX1100-GISEL-LABEL: v_log_f32_nnan_ninf_daz: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; R600-LABEL: v_log_f32_nnan_ninf: -; R600: ; %bb.0: -; R600-NEXT: CF_END -; R600-NEXT: PAD -; -; CM-LABEL: v_log_f32_nnan_ninf: -; CM: ; %bb.0: -; CM-NEXT: CF_END -; CM-NEXT: PAD - %result = call nnan ninf float @llvm.log.f32(float %in) - ret float %result -} - -define float @v_log_f32_nnan_ninf_daz(float %in) #0 { -; GFX689-LABEL: v_log_f32_nnan_ninf_daz: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: v_log_f32_nnan_ninf_daz: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] -; ; R600-LABEL: v_log_f32_nnan_ninf_daz: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -2436,49 +4596,143 @@ define float @v_log_f32_nnan_ninf_daz(float %in) #0 { } define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 { -; GFX689-SDAG-LABEL: v_log_f32_nnan_ninf_dynamic: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log_f32_nnan_ninf_dynamic: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log_f32_nnan_ninf_dynamic: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x41b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log_f32_nnan_ninf_dynamic: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x41b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_f32_nnan_ninf_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v0 +; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3f317000, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x41b17218 +; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3f317000, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_f32_nnan_ninf_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v0 +; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x3f317000, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x41b17218 +; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x3f317000, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_f32_nnan_ninf_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x41b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_f32_nnan_ninf_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x41b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log_f32_nnan_ninf_dynamic: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log_f32_nnan_ninf_dynamic: @@ -2486,14 +4740,18 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_nnan_ninf_dynamic: @@ -2539,49 +4797,164 @@ define float @v_log_f32_fast_daz(float %in) #0 { } define float @v_log_f32_dynamic_mode(float %in) #1 { -; GFX689-SDAG-LABEL: v_log_f32_dynamic_mode: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log_f32_dynamic_mode: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log_f32_dynamic_mode: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log_f32_dynamic_mode: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_f32_dynamic_mode: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_f32_dynamic_mode: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2 +; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_f32_dynamic_mode: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_f32_dynamic_mode: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log_f32_dynamic_mode: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log_f32_dynamic_mode: @@ -2589,14 +4962,21 @@ define float @v_log_f32_dynamic_mode(float %in) #1 { ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_dynamic_mode: @@ -2613,35 +4993,142 @@ define float @v_log_f32_dynamic_mode(float %in) #1 { } define float @v_log_f32_undef() { -; GFX689-SDAG-LABEL: v_log_f32_undef: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, s4 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log_f32_undef: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e64 v2, s4, 1.0 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log_f32_undef: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_log_f32_e32 v0, s4 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf +; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log_f32_undef: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1 +; SI-GISEL-NEXT: v_mul_f32_e64 v2, s4, 1.0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_f32_undef: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_log_f32_e32 v0, s4 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_f32_undef: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1 +; VI-GISEL-NEXT: v_mul_f32_e64 v2, s4, 1.0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_f32_undef: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, s4 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf +; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_f32_undef: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1 +; GFX900-GISEL-NEXT: v_mul_f32_e64 v2, s4, 1.0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log_f32_undef: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, s0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log_f32_undef: @@ -2650,14 +5137,21 @@ define float @v_log_f32_undef() { ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, 0x4f800000, s0 ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v1, s0, 1.0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x800000, s0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_undef: @@ -2665,47 +5159,156 @@ define float @v_log_f32_undef() { ; R600-NEXT: CF_END ; R600-NEXT: PAD ; -; CM-LABEL: v_log_f32_undef: -; CM: ; %bb.0: -; CM-NEXT: CF_END -; CM-NEXT: PAD - %result = call float @llvm.log.f32(float undef) - ret float %result -} - -define float @v_log_f32_0() { -; GFX689-SDAG-LABEL: v_log_f32_0: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, 0 -; GFX689-SDAG-NEXT: v_add_f32_e32 v0, 0xc2000000, v0 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log_f32_0: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e64 v2, 0, 1.0 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; CM-LABEL: v_log_f32_undef: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call float @llvm.log.f32(float undef) + ret float %result +} + +define float @v_log_f32_0() { +; SI-SDAG-LABEL: v_log_f32_0: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_log_f32_e32 v0, 0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf +; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v0, 0xc1b17218, v0 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log_f32_0: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1 +; SI-GISEL-NEXT: v_mul_f32_e64 v2, 0, 1.0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_f32_0: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_log_f32_e32 v0, 0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v0, 0xc1b17218, v0 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_f32_0: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1 +; VI-GISEL-NEXT: v_mul_f32_e64 v2, 0, 1.0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_f32_0: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, 0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf +; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, 0xc1b17218, v0 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_f32_0: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1 +; GFX900-GISEL-NEXT: v_mul_f32_e64 v2, 0, 1.0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log_f32_0: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, 0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, 0xc2000000, v0 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, 0xc1b17218, v0 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log_f32_0: @@ -2714,14 +5317,21 @@ define float @v_log_f32_0() { ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, 0x4f800000, 0 ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v1, 0, 1.0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x800000, 0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_0: @@ -2738,23 +5348,141 @@ define float @v_log_f32_0() { } define float @v_log_f32_from_fpext_f16(i16 %src.i) { -; GFX689-LABEL: v_log_f32_from_fpext_f16: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log_f32_from_fpext_f16: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf +; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log_f32_from_fpext_f16: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log_f32_from_fpext_f16: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_f32_from_fpext_f16: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_f32_from_fpext_f16: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_f32_from_fpext_f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf +; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_f32_from_fpext_f16: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log_f32_from_fpext_f16: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log_f32_from_fpext_f16: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_from_fpext_f16: ; R600: ; %bb.0: @@ -2784,10 +5512,18 @@ define float @v_log_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log_f32_from_fpext_math_f16: @@ -2795,41 +5531,130 @@ define float @v_log_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_log_f32_from_fpext_math_f16: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_add_f16_e32 v0, v0, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_log_f32_e32 v0, v0 -; VI-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; VI-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_log_f32_from_fpext_math_f16: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_log_f32_from_fpext_math_f16: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_add_f16_e32 v0, v0, v1 -; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX900-NEXT: v_log_f32_e32 v0, v0 -; GFX900-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_log_f32_from_fpext_math_f16: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log_f32_from_fpext_math_f16: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_add_f16_e32 v0, v0, v1 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_log_f32_from_fpext_math_f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf +; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_f32_from_fpext_math_f16: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log_f32_from_fpext_math_f16: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log_f32_from_fpext_math_f16: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_from_fpext_math_f16: ; R600: ; %bb.0: @@ -2849,52 +5674,166 @@ define float @v_log_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { } define float @v_log_f32_from_fpext_bf16(bfloat %src) { -; GFX689-SDAG-LABEL: v_log_f32_from_fpext_bf16: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log_f32_from_fpext_bf16: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log_f32_from_fpext_bf16: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log_f32_from_fpext_bf16: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_f32_from_fpext_bf16: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_f32_from_fpext_bf16: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_f32_from_fpext_bf16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_f32_from_fpext_bf16: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log_f32_from_fpext_bf16: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log_f32_from_fpext_bf16: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_from_fpext_bf16: @@ -2919,6 +5858,8 @@ define half @v_log_f16(half %in) { ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log_f16: @@ -2973,6 +5914,8 @@ define half @v_log_fabs_f16(half %in) { ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log_fabs_f16: @@ -3028,6 +5971,8 @@ define half @v_log_fneg_fabs_f16(half %in) { ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -|v0| ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log_fneg_fabs_f16: @@ -3084,6 +6029,8 @@ define half @v_log_fneg_f16(half %in) { ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log_fneg_f16: @@ -3139,6 +6086,8 @@ define half @v_log_f16_fast(half %in) { ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log_f16_fast: @@ -3197,6 +6146,10 @@ define <2 x half> @v_log_v2f16(<2 x half> %in) { ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log_v2f16: @@ -3282,6 +6235,10 @@ define <2 x half> @v_log_fabs_v2f16(<2 x half> %in) { ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log_fabs_v2f16: @@ -3397,13 +6354,17 @@ define <2 x half> @v_log_fneg_fabs_v2f16(<2 x half> %in) { ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_or_b32_e32 v0, 0x80008000, v0 -; SI-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0 +; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log_fneg_fabs_v2f16: @@ -3520,13 +6481,17 @@ define <2 x half> @v_log_fneg_v2f16(<2 x half> %in) { ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; SI-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0 +; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log_fneg_v2f16: @@ -3645,6 +6610,10 @@ define <2 x half> @v_log_v2f16_fast(<2 x half> %in) { ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log_v2f16_fast: @@ -3734,6 +6703,12 @@ define <3 x half> @v_log_v3f16(<3 x half> %in) { ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log_v3f16: @@ -3823,6 +6798,12 @@ define <3 x half> @v_log_v3f16_fast(<3 x half> %in) { ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log_v3f16_fast: @@ -3902,20 +6883,28 @@ define <4 x half> @v_log_v4f16(<4 x half> %in) { ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2 ; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317218, v3 +; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log_v4f16: @@ -4062,20 +7051,28 @@ define <4 x half> @v_log_v4f16_fast(<4 x half> %in) { ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2 ; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317218, v3 +; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log_v4f16_fast: @@ -4232,4 +7229,6 @@ attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" } attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" } attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX689-GISEL: {{.*}} +; GFX689-SDAG: {{.*}} ; SI: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll index 4ca17c757ff75d..432d407974f253 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll @@ -15,60 +15,88 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) { ; SI-SDAG-LABEL: s_log10_f32: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dword s2, s[0:1], 0xb +; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: s_mov_b32 s0, 0x3e9a209a ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 -; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 -; SI-SDAG-NEXT: s_mov_b32 s2, -1 -; SI-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s6, -1 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s0, -v1 +; SI-SDAG-NEXT: s_mov_b32 s0, 0x3284fbcf +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s0, v2 +; SI-SDAG-NEXT: s_mov_b32 s0, 0x7f800000 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-SDAG-NEXT: s_endpgm ; ; SI-GISEL-LABEL: s_log10_f32: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_load_dword s2, s[0:1], 0xb -; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: s_mov_b32 s0, 0x3e9a209a ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc ; SI-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: s_mov_b32 s6, -1 +; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v4, v0, s0, -v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-GISEL-NEXT: s_mov_b32 s2, -1 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; SI-GISEL-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-GISEL-NEXT: s_endpgm ; ; VI-SDAG-LABEL: s_log10_f32: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 -; VI-SDAG-NEXT: v_log_f32_e32 v1, v1 -; VI-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; VI-SDAG-NEXT: s_mov_b32 s0, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_mov_b32_e32 v0, s2 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 ; VI-SDAG-NEXT: s_endpgm ; @@ -77,57 +105,85 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) { ; VI-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc ; VI-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v0 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 ; VI-GISEL-NEXT: s_endpgm ; ; GFX900-SDAG-LABEL: s_log10_f32: ; GFX900-SDAG: ; %bb.0: -; GFX900-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX900-SDAG-NEXT: s_load_dword s4, s[0:1], 0x2c +; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0 -; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 -; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX900-SDAG-NEXT: global_store_dword v2, v0, s[0:1] +; GFX900-SDAG-NEXT: s_mov_b32 s0, 0x3e9a209a +; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s1, 0x3284fbcf +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s0, -v2 +; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s1, v3 +; GFX900-SDAG-NEXT: s_mov_b32 s0, 0x7f800000 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x411a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: global_store_dword v1, v0, s[2:3] ; GFX900-SDAG-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: s_log10_f32: ; GFX900-GISEL: ; %bb.0: -; GFX900-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX900-GISEL-NEXT: s_load_dword s4, s[0:1], 0x2c +; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x3e9a209a ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s0, -v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[2:3] ; GFX900-GISEL-NEXT: s_endpgm ; ; GFX1100-SDAG-LABEL: s_log10_f32: @@ -137,16 +193,23 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) { ; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s3 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s3 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_dual_sub_f32 v0, v1, v0 :: v_dual_mov_b32 v1, 0 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_dual_add_f32 v1, v1, v2 :: v_dual_mov_b32 v2, 0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s3 +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-SDAG-NEXT: s_endpgm ; @@ -157,22 +220,29 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) { ; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s2 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s3 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s3 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v1 :: v_dual_mov_b32 v1, 0 -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x411a209b, s3 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v0, v0, v1 :: v_dual_mov_b32 v1, 0 +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_log10_f32: ; R600: ; %bb.0: -; R600-NEXT: ALU 11, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: ALU 23, @4, KC0[CB0:0-32], KC1[] ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 ; R600-NEXT: CF_END ; R600-NEXT: PAD @@ -181,18 +251,30 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) { ; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) ; R600-NEXT: CNDE * T1.W, PV.W, 1.0, literal.x, ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T1.W, KC0[2].Z, PV.W, -; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x, -; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE * T1.W, KC0[2].Z, PV.W, ; R600-NEXT: LOG_IEEE * T0.X, PV.W, -; R600-NEXT: ADD * T0.W, PS, -T0.W, -; R600-NEXT: MUL_IEEE T0.X, PV.W, literal.x, -; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, -; R600-NEXT: 1050288283(3.010300e-01), 2(2.802597e-45) +; R600-NEXT: AND_INT * T1.W, PS, literal.x, +; R600-NEXT: -4096(nan), 0(0.000000e+00) +; R600-NEXT: ADD * T2.W, T0.X, -PV.W, +; R600-NEXT: MUL_IEEE * T3.W, PV.W, literal.x, +; R600-NEXT: 916096251(4.605039e-06), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE * T3.W, T1.W, literal.x, PV.W, +; R600-NEXT: 916096251(4.605039e-06), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE * T2.W, T2.W, literal.x, PV.W, +; R600-NEXT: 1050288128(3.010254e-01), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PV.W, +; R600-NEXT: SETGT * T2.W, literal.y, |T0.X|, +; R600-NEXT: 1050288128(3.010254e-01), 2139095040(INF) +; R600-NEXT: CNDE T1.W, PS, T0.X, PV.W, +; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x, +; R600-NEXT: 1092231323(9.632960e+00), 0(0.000000e+00) +; R600-NEXT: ADD T0.X, PV.W, -PS, +; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; ; CM-LABEL: s_log10_f32: ; CM: ; %bb.0: -; CM-NEXT: ALU 15, @4, KC0[CB0:0-32], KC1[] +; CM-NEXT: ALU 26, @4, KC0[CB0:0-32], KC1[] ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X ; CM-NEXT: CF_END ; CM-NEXT: PAD @@ -201,16 +283,27 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) { ; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) ; CM-NEXT: CNDE * T1.W, PV.W, 1.0, literal.x, ; CM-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) -; CM-NEXT: CNDE T0.Z, T0.W, 0.0, literal.x, -; CM-NEXT: MUL_IEEE * T0.W, KC0[2].Z, PV.W, -; CM-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) -; CM-NEXT: LOG_IEEE T0.X, T0.W, -; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W, -; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, -; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, -; CM-NEXT: ADD * T0.W, PV.X, -T0.Z, -; CM-NEXT: MUL_IEEE * T0.X, PV.W, literal.x, -; CM-NEXT: 1050288283(3.010300e-01), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE * T1.W, KC0[2].Z, PV.W, +; CM-NEXT: LOG_IEEE T0.X, T1.W, +; CM-NEXT: LOG_IEEE T0.Y (MASKED), T1.W, +; CM-NEXT: LOG_IEEE T0.Z (MASKED), T1.W, +; CM-NEXT: LOG_IEEE * T0.W (MASKED), T1.W, +; CM-NEXT: AND_INT * T1.W, PV.X, literal.x, +; CM-NEXT: -4096(nan), 0(0.000000e+00) +; CM-NEXT: ADD * T2.W, T0.X, -PV.W, +; CM-NEXT: MUL_IEEE * T3.W, PV.W, literal.x, +; CM-NEXT: 916096251(4.605039e-06), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE * T3.W, T1.W, literal.x, PV.W, +; CM-NEXT: 916096251(4.605039e-06), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE * T2.W, T2.W, literal.x, PV.W, +; CM-NEXT: 1050288128(3.010254e-01), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE T0.Z, T1.W, literal.x, PV.W, +; CM-NEXT: SETGT * T1.W, literal.y, |T0.X|, +; CM-NEXT: 1050288128(3.010254e-01), 2139095040(INF) +; CM-NEXT: CNDE T0.Z, PV.W, T0.X, PV.Z, +; CM-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x, +; CM-NEXT: 1092231323(9.632960e+00), 0(0.000000e+00) +; CM-NEXT: ADD * T0.X, PV.Z, -PV.W, ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %result = call float @llvm.log10.f32(float %in) @@ -225,162 +318,259 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x4f800000 -; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: s_mov_b32 s8, 0x3284fbcf +; SI-SDAG-NEXT: s_mov_b32 s9, 0x7f800000 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc -; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v4, s3, v4 -; SI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 -; SI-SDAG-NEXT: v_log_f32_e32 v4, v4 -; SI-SDAG-NEXT: v_log_f32_e32 v3, v1 -; SI-SDAG-NEXT: s_mov_b32 s6, -1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v2, s3, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s3, 0x3e9a209a ; SI-SDAG-NEXT: s_mov_b32 s4, s0 -; SI-SDAG-NEXT: v_sub_f32_e32 v1, v4, v2 -; SI-SDAG-NEXT: v_sub_f32_e32 v0, v3, v0 ; SI-SDAG-NEXT: s_mov_b32 s5, s1 -; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v2 +; SI-SDAG-NEXT: v_fma_f32 v4, v2, s3, -v3 +; SI-SDAG-NEXT: v_fma_f32 v4, v2, s8, v4 +; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s9 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] +; SI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x411a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v2, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v4, v0, s3, -v2 +; SI-SDAG-NEXT: v_fma_f32 v4, v0, s8, v4 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s9 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1] +; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s6, -1 +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2 ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-SDAG-NEXT: s_endpgm ; ; SI-GISEL-LABEL: s_log10_v2f32: ; SI-GISEL: ; %bb.0: -; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; SI-GISEL-NEXT: s_mov_b32 s4, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x4f800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-GISEL-NEXT: s_mov_b32 s0, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-GISEL-NEXT: s_mov_b32 s2, 0x3284fbcf +; SI-GISEL-NEXT: s_mov_b32 s3, 0x7f800000 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; SI-GISEL-NEXT: v_mov_b32_e32 v2, s2 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v2 -; SI-GISEL-NEXT: v_mov_b32_e32 v4, s3 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v0, vcc -; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, s3, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, s6 +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v2 -; SI-GISEL-NEXT: v_log_f32_e32 v4, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-GISEL-NEXT: s_mov_b32 s2, -1 -; SI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v3 -; SI-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 -; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 -; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; SI-GISEL-NEXT: s_mov_b32 s0, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3e9a209a +; SI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v2 +; SI-GISEL-NEXT: v_fma_f32 v5, v2, s0, -v4 +; SI-GISEL-NEXT: v_fma_f32 v5, v2, s2, v5 +; SI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s3 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1] +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] +; SI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0 +; SI-GISEL-NEXT: v_log_f32_e32 v1, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x411a209b +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v1 +; SI-GISEL-NEXT: v_fma_f32 v3, v1, v3, -v2 +; SI-GISEL-NEXT: v_fma_f32 v3, v1, s2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s3 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[0:1] +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: s_mov_b32 s6, -1 +; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-GISEL-NEXT: s_endpgm ; ; VI-SDAG-LABEL: s_log10_v2f32: ; VI-SDAG: ; %bb.0: -; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: s_mov_b32 s2, 0x7f800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc -; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v4, s3, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 -; VI-SDAG-NEXT: v_log_f32_e32 v4, v4 -; VI-SDAG-NEXT: v_log_f32_e32 v2, v1 -; VI-SDAG-NEXT: v_sub_f32_e32 v1, v4, v3 -; VI-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v3, s1 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v2, s7, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v2 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v2, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s2 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] +; VI-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x411a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1] +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, s4 +; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 ; VI-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-SDAG-NEXT: s_endpgm ; ; VI-GISEL-LABEL: s_log10_v2f32: ; VI-GISEL: ; %bb.0: -; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-GISEL-NEXT: s_mov_b32 s4, 0x800000 +; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-GISEL-NEXT: s_mov_b32 s0, 0x800000 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x4f800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; VI-GISEL-NEXT: v_mov_b32_e32 v2, s2 -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v2 -; VI-GISEL-NEXT: v_mov_b32_e32 v4, s3 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v0, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v0, s3, v0 -; VI-GISEL-NEXT: v_log_f32_e32 v2, v2 -; VI-GISEL-NEXT: v_log_f32_e32 v4, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; VI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v3 -; VI-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 -; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, s6 +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v0, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v1, v1 +; VI-GISEL-NEXT: s_mov_b32 s0, 0x7f800000 +; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v1 +; VI-GISEL-NEXT: v_sub_f32_e32 v4, v1, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a2000, v4 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s0 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v0, s[0:1] +; VI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0 +; VI-GISEL-NEXT: v_log_f32_e32 v2, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x411a209b +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v2 +; VI-GISEL-NEXT: v_sub_f32_e32 v5, v2, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v6, v7, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v5 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v5 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v2|, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1] +; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 ; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-GISEL-NEXT: s_endpgm ; ; GFX900-SDAG-LABEL: s_log10_v2f32: ; GFX900-SDAG: ; %bb.0: -; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: s_mov_b32 s2, 0x3e9a209a +; GFX900-SDAG-NEXT: s_mov_b32 s3, 0x3284fbcf ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, s3, v4 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 -; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v4 -; GFX900-SDAG-NEXT: v_log_f32_e32 v2, v1 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v4, v3 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX900-SDAG-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s7, v3 +; GFX900-SDAG-NEXT: v_log_f32_e32 v3, v3 +; GFX900-SDAG-NEXT: s_mov_b32 s7, 0x7f800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v3 +; GFX900-SDAG-NEXT: v_fma_f32 v5, v3, s2, -v4 +; GFX900-SDAG-NEXT: v_fma_f32 v5, v3, s3, v5 +; GFX900-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v3|, s7 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1] +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0x411a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v5, v0, s2, -v3 +; GFX900-SDAG-NEXT: v_fma_f32 v5, v0, s3, v5 +; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s7 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1] +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v3 +; GFX900-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX900-SDAG-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: s_log10_v2f32: ; GFX900-GISEL: ; %bb.0: -; GFX900-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x4f800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-GISEL-NEXT: s_mov_b32 s2, 0x3284fbcf +; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x7f800000 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, s2 -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v2 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, s3 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v0, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s3, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, s6 +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2 ; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v2 -; GFX900-GISEL-NEXT: v_log_f32_e32 v4, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v2, v3 -; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 +; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v2 +; GFX900-GISEL-NEXT: v_fma_f32 v5, v2, s0, -v4 +; GFX900-GISEL-NEXT: v_fma_f32 v5, v2, s2, v5 +; GFX900-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1] +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0 +; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x411a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v1, v3, -v2 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v1, s2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[0:1] +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX900-GISEL-NEXT: s_endpgm ; ; GFX1100-SDAG-LABEL: s_log10_v2f32: @@ -390,19 +580,29 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s3 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s5, 0x800000, s2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s4 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s5 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s4 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s5 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, s3, v2 :: v_dual_mul_f32 v3, s2, v3 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, v3 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s4 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s5 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v0, s3, v0 :: v_dual_mul_f32 v1, s2, v1 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0 -; GFX1100-SDAG-NEXT: v_dual_sub_f32 v2, v3, v1 :: v_dual_mov_b32 v3, 0 -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v1, 0x3e9a209b, v0 :: v_dual_mul_f32 v0, 0x3e9a209b, v2 +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, 0x3e9a209a, v0 :: v_dual_mul_f32 v3, 0x3e9a209a, v1 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: v_fma_f32 v4, 0x3e9a209a, v0, -v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v5, 0x3e9a209a, v1, -v3 +; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v4, 0x3284fbcf, v0 :: v_dual_fmac_f32 v5, 0x3284fbcf, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX1100-SDAG-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_add_f32 v3, v3, v5 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 0x411a209b, s4 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 0x411a209b, s5 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| +; GFX1100-SDAG-NEXT: v_dual_cndmask_b32 v2, v1, v3 :: v_dual_mov_b32 v3, 0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v0, v4 :: v_dual_sub_f32 v0, v2, v5 ; GFX1100-SDAG-NEXT: global_store_b64 v3, v[0:1], s[0:1] ; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-SDAG-NEXT: s_endpgm @@ -416,25 +616,35 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s4 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s5 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s4 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s5 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s2, v0 :: v_dual_mul_f32 v1, s3, v1 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3 -; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, 0x3e9a209b, v0 :: v_dual_mul_f32 v1, 0x3e9a209b, v1 +; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, 0x3e9a209a, v0 :: v_dual_mul_f32 v3, 0x3e9a209a, v1 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: v_fma_f32 v4, v0, 0x3e9a209a, -v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v5, 0x3e9a209a, v1, -v3 +; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v4, 0x3284fbcf, v0 :: v_dual_fmac_f32 v5, 0x3284fbcf, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX1100-GISEL-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_add_f32 v3, v3, v5 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x411a209b, s4 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x411a209b, s5 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| +; GFX1100-GISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_cndmask_b32 v1, v1, v3 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_sub_f32 v1, v1, v5 ; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_log10_v2f32: ; R600: ; %bb.0: -; R600-NEXT: ALU 21, @4, KC0[CB0:0-32], KC1[] -; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 +; R600-NEXT: ALU 41, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XY, T0.X, 1 ; R600-NEXT: CF_END ; R600-NEXT: PAD ; R600-NEXT: ALU clause starting at 4: @@ -446,25 +656,45 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; R600-NEXT: MUL_IEEE T2.W, KC0[3].X, PV.W, ; R600-NEXT: CNDE * T3.W, T1.W, 1.0, literal.x, ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T0.Z, KC0[2].W, PS, -; R600-NEXT: CNDE T0.W, T0.W, 0.0, literal.x, +; R600-NEXT: MUL_IEEE T3.W, KC0[2].W, PS, ; R600-NEXT: LOG_IEEE * T0.X, PV.W, -; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) -; R600-NEXT: CNDE T1.Z, T1.W, 0.0, literal.x, -; R600-NEXT: ADD T0.W, PS, -PV.W, -; R600-NEXT: LOG_IEEE * T0.X, PV.Z, -; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T0.Y, PV.W, literal.x, -; R600-NEXT: ADD * T0.W, PS, -PV.Z, -; R600-NEXT: 1050288283(3.010300e-01), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T0.X, PV.W, literal.x, -; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, -; R600-NEXT: 1050288283(3.010300e-01), 2(2.802597e-45) +; R600-NEXT: AND_INT T2.W, PS, literal.x, +; R600-NEXT: LOG_IEEE * T0.Y, PV.W, +; R600-NEXT: -4096(nan), 0(0.000000e+00) +; R600-NEXT: ADD T3.W, T0.X, -PV.W, +; R600-NEXT: AND_INT * T4.W, PS, literal.x, +; R600-NEXT: -4096(nan), 0(0.000000e+00) +; R600-NEXT: ADD T5.W, T0.Y, -PS, +; R600-NEXT: MUL_IEEE * T6.W, PV.W, literal.x, +; R600-NEXT: 916096251(4.605039e-06), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T6.W, T2.W, literal.x, PS, +; R600-NEXT: MUL_IEEE * T7.W, PV.W, literal.x, +; R600-NEXT: 916096251(4.605039e-06), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T7.W, T4.W, literal.x, PS, +; R600-NEXT: MULADD_IEEE * T3.W, T3.W, literal.y, PV.W, BS:VEC_021/SCL_122 +; R600-NEXT: 916096251(4.605039e-06), 1050288128(3.010254e-01) +; R600-NEXT: MULADD_IEEE T0.Z, T2.W, literal.x, PS, +; R600-NEXT: SETGT T2.W, literal.y, |T0.X|, +; R600-NEXT: MULADD_IEEE * T3.W, T5.W, literal.x, PV.W, BS:VEC_021/SCL_122 +; R600-NEXT: 1050288128(3.010254e-01), 2139095040(INF) +; R600-NEXT: MULADD_IEEE T1.Y, T4.W, literal.x, PS, +; R600-NEXT: SETGT T1.Z, literal.y, |T0.Y|, +; R600-NEXT: CNDE T2.W, PV.W, T0.X, PV.Z, +; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.z, +; R600-NEXT: 1050288128(3.010254e-01), 2139095040(INF) +; R600-NEXT: 1092231323(9.632960e+00), 0(0.000000e+00) +; R600-NEXT: ADD T2.Y, PV.W, -PS, +; R600-NEXT: CNDE T0.W, PV.Z, T0.Y, PV.Y, +; R600-NEXT: CNDE * T1.W, T1.W, 0.0, literal.x, +; R600-NEXT: 1092231323(9.632960e+00), 0(0.000000e+00) +; R600-NEXT: ADD T2.X, PV.W, -PS, +; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, +; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; ; CM-LABEL: s_log10_v2f32: ; CM: ; %bb.0: -; CM-NEXT: ALU 27, @4, KC0[CB0:0-32], KC1[] -; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X +; CM-NEXT: ALU 47, @4, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X ; CM-NEXT: CF_END ; CM-NEXT: PAD ; CM-NEXT: ALU clause starting at 4: @@ -473,28 +703,48 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; CM-NEXT: CNDE T0.Z, PV.W, 1.0, literal.x, ; CM-NEXT: SETGT * T1.W, literal.y, KC0[2].W, ; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38) -; CM-NEXT: CNDE T0.Y, PV.W, 1.0, literal.x, -; CM-NEXT: CNDE T1.Z, T0.W, 0.0, literal.y, -; CM-NEXT: MUL_IEEE * T0.W, KC0[3].X, PV.Z, -; CM-NEXT: 1333788672(4.294967e+09), 1107296256(3.200000e+01) -; CM-NEXT: LOG_IEEE T0.X, T0.W, -; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W, -; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, -; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, -; CM-NEXT: CNDE T1.Y, T1.W, 0.0, literal.x, -; CM-NEXT: ADD T0.Z, PV.X, -T1.Z, -; CM-NEXT: MUL_IEEE * T0.W, KC0[2].W, T0.Y, -; CM-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) -; CM-NEXT: LOG_IEEE T0.X, T0.W, -; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W, -; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, -; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, -; CM-NEXT: MUL_IEEE T0.Y, T0.Z, literal.x, -; CM-NEXT: ADD * T0.W, PV.X, -T1.Y, -; CM-NEXT: 1050288283(3.010300e-01), 0(0.000000e+00) -; CM-NEXT: MUL_IEEE * T0.X, PV.W, literal.x, -; CM-NEXT: 1050288283(3.010300e-01), 0(0.000000e+00) -; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; CM-NEXT: CNDE T1.Z, PV.W, 1.0, literal.x, +; CM-NEXT: MUL_IEEE * T2.W, KC0[3].X, PV.Z, +; CM-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) +; CM-NEXT: LOG_IEEE T0.X, T2.W, +; CM-NEXT: LOG_IEEE T0.Y (MASKED), T2.W, +; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W, +; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W, +; CM-NEXT: AND_INT T0.Z, PV.X, literal.x, +; CM-NEXT: MUL_IEEE * T2.W, KC0[2].W, T1.Z, +; CM-NEXT: -4096(nan), 0(0.000000e+00) +; CM-NEXT: LOG_IEEE T0.X (MASKED), T2.W, +; CM-NEXT: LOG_IEEE T0.Y, T2.W, +; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W, +; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W, +; CM-NEXT: ADD T1.Z, T0.X, -T0.Z, +; CM-NEXT: AND_INT * T2.W, PV.Y, literal.x, +; CM-NEXT: -4096(nan), 0(0.000000e+00) +; CM-NEXT: ADD T2.Z, T0.Y, -PV.W, +; CM-NEXT: MUL_IEEE * T3.W, PV.Z, literal.x, +; CM-NEXT: 916096251(4.605039e-06), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE T3.Z, T0.Z, literal.x, PV.W, +; CM-NEXT: MUL_IEEE * T3.W, PV.Z, literal.x, +; CM-NEXT: 916096251(4.605039e-06), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE T4.Z, T2.W, literal.x, PV.W, +; CM-NEXT: MULADD_IEEE * T3.W, T1.Z, literal.y, PV.Z, +; CM-NEXT: 916096251(4.605039e-06), 1050288128(3.010254e-01) +; CM-NEXT: MULADD_IEEE T1.Y, T0.Z, literal.x, PV.W, +; CM-NEXT: SETGT T0.Z, literal.y, |T0.X|, +; CM-NEXT: MULADD_IEEE * T3.W, T2.Z, literal.x, PV.Z, BS:VEC_120/SCL_212 +; CM-NEXT: 1050288128(3.010254e-01), 2139095040(INF) +; CM-NEXT: MULADD_IEEE T1.X, T2.W, literal.x, PV.W, +; CM-NEXT: SETGT T2.Y, literal.y, |T0.Y|, +; CM-NEXT: CNDE T0.Z, PV.Z, T0.X, PV.Y, +; CM-NEXT: CNDE * T0.W, T0.W, 0.0, literal.z, BS:VEC_120/SCL_212 +; CM-NEXT: 1050288128(3.010254e-01), 2139095040(INF) +; CM-NEXT: 1092231323(9.632960e+00), 0(0.000000e+00) +; CM-NEXT: ADD T1.Y, PV.Z, -PV.W, +; CM-NEXT: CNDE T0.Z, PV.Y, T0.Y, PV.X, +; CM-NEXT: CNDE * T0.W, T1.W, 0.0, literal.x, +; CM-NEXT: 1092231323(9.632960e+00), 0(0.000000e+00) +; CM-NEXT: ADD * T1.X, PV.Z, -PV.W, +; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %result = call <2 x float> @llvm.log10.v2f32(<2 x float> %in) store <2 x float> %result, ptr addrspace(1) %out @@ -504,215 +754,354 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in) define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; SI-SDAG-LABEL: s_log10_v3f32: ; SI-SDAG: ; %bb.0: -; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd +; SI-SDAG-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd +; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v4, s5, v4 -; SI-SDAG-NEXT: v_log_f32_e32 v4, v4 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v2, vcc -; SI-SDAG-NEXT: v_sub_f32_e32 v3, v4, v3 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc -; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 +; SI-SDAG-NEXT: s_mov_b32 s11, 0x3284fbcf +; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s9, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v1, s9, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s9, 0x3e9a209a +; SI-SDAG-NEXT: s_mov_b32 s12, 0x7f800000 +; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v1 +; SI-SDAG-NEXT: v_fma_f32 v4, v1, s9, -v3 +; SI-SDAG-NEXT: v_fma_f32 v4, v1, s11, v4 +; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s12 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s8, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v2, s[0:1] +; SI-SDAG-NEXT: v_mul_f32_e32 v3, s8, v3 +; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x411a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc +; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s10, v0 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0 -; SI-SDAG-NEXT: v_mul_f32_e32 v5, s4, v5 +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v5 +; SI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v3 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, s10, v0 +; SI-SDAG-NEXT: v_fma_f32 v6, v3, s9, -v5 ; SI-SDAG-NEXT: v_log_f32_e32 v2, v0 -; SI-SDAG-NEXT: v_log_f32_e32 v5, v5 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc -; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 -; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v6 -; SI-SDAG-NEXT: s_mov_b32 s2, -1 -; SI-SDAG-NEXT: v_sub_f32_e32 v0, v5, v4 -; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2 -; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v3 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; SI-SDAG-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8 -; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; SI-SDAG-NEXT: v_fma_f32 v6, v3, s11, v6 +; SI-SDAG-NEXT: v_add_f32_e32 v5, v5, v6 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, s12 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v3, v5, s[2:3] +; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1] +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v3 +; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v2 +; SI-SDAG-NEXT: v_fma_f32 v5, v2, s9, -v3 +; SI-SDAG-NEXT: v_fma_f32 v5, v2, s11, v5 +; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v5 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s12 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc +; SI-SDAG-NEXT: s_mov_b32 s6, -1 +; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-SDAG-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 +; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-SDAG-NEXT: s_endpgm ; ; SI-GISEL-LABEL: s_log10_v3f32: ; SI-GISEL: ; %bb.0: -; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-GISEL-NEXT: s_mov_b32 s2, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000 +; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd +; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; SI-GISEL-NEXT: s_mov_b32 s0, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: s_mov_b32 s2, 0x3284fbcf ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v0, s8 +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 -; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 -; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 -; SI-GISEL-NEXT: v_mov_b32_e32 v4, s5 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v1, vcc -; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2 -; SI-GISEL-NEXT: v_mul_f32_e32 v4, s5, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-GISEL-NEXT: s_mov_b32 s0, 0x3e9a209a +; SI-GISEL-NEXT: s_mov_b32 s3, 0x7f800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v5, v0, s0, -v4 +; SI-GISEL-NEXT: v_fma_f32 v5, v0, s2, v5 +; SI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s3 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1] +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v2, s[0:1] +; SI-GISEL-NEXT: v_mul_f32_e32 v4, s9, v4 ; SI-GISEL-NEXT: v_log_f32_e32 v4, v4 -; SI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x411a209b +; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3e9a209a +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8 +; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v4 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, s10, v1 +; SI-GISEL-NEXT: v_fma_f32 v9, v4, v3, -v8 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc -; SI-GISEL-NEXT: v_sub_f32_e32 v1, v4, v5 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 +; SI-GISEL-NEXT: v_fma_f32 v9, v4, s2, v9 +; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s3 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, v8, s[2:3] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1] +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4 +; SI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x3284fbcf +; SI-GISEL-NEXT: v_fma_f32 v3, v2, v3, -v4 +; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 +; SI-GISEL-NEXT: v_fma_f32 v3, v2, v6, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v7, vcc +; SI-GISEL-NEXT: s_mov_b32 s6, -1 +; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 -; SI-GISEL-NEXT: s_mov_b32 s2, -1 -; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2 -; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 -; SI-GISEL-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8 +; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-GISEL-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 ; SI-GISEL-NEXT: s_endpgm ; ; VI-SDAG-LABEL: s_log10_v3f32: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: s_mov_b32 s8, 0x7f800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v4, s6, v4 -; VI-SDAG-NEXT: v_log_f32_e32 v4, v4 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc -; VI-SDAG-NEXT: v_sub_f32_e32 v3, v4, v3 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v2, s6, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v2, v2 +; VI-SDAG-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24 +; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v2 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v2, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s8 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1] +; VI-SDAG-NEXT: v_mul_f32_e32 v3, s5, v3 +; VI-SDAG-NEXT: v_log_f32_e32 v3, v3 +; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x411a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v5 +; VI-SDAG-NEXT: v_and_b32_e32 v5, 0xfffff000, v3 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v4, s5, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1 -; VI-SDAG-NEXT: v_log_f32_e32 v4, v4 -; VI-SDAG-NEXT: v_log_f32_e32 v6, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v3 -; VI-SDAG-NEXT: v_sub_f32_e32 v1, v4, v5 -; VI-SDAG-NEXT: v_sub_f32_e32 v0, v6, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v4, s1 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v3, s0 +; VI-SDAG-NEXT: v_sub_f32_e32 v6, v3, v5 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3e9a2000, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v6, v8, v6 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v5 +; VI-SDAG-NEXT: v_add_f32_e32 v5, v5, v6 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, s8 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v5, s[2:3] +; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1] +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v5, v0, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v5, v7, v5 +; VI-SDAG-NEXT: v_add_f32_e32 v5, v6, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v5 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s8 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1] +; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v3 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_mov_b32_e32 v3, s6 +; VI-SDAG-NEXT: v_mov_b32_e32 v4, s7 ; VI-SDAG-NEXT: flat_store_dwordx3 v[3:4], v[0:2] ; VI-SDAG-NEXT: s_endpgm ; ; VI-GISEL-LABEL: s_log10_v3f32: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 ; VI-GISEL-NEXT: s_mov_b32 s2, 0x800000 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000 +; VI-GISEL-NEXT: s_mov_b32 s0, 0x7f800000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v0 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc ; VI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 -; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 -; VI-GISEL-NEXT: v_mov_b32_e32 v4, s5 -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v1, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc +; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v4, v0, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a2000, v4 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1] +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1] +; VI-GISEL-NEXT: v_mul_f32_e32 v3, s5, v3 +; VI-GISEL-NEXT: v_log_f32_e32 v3, v3 +; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x411a209b +; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v6 +; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v3 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2 +; VI-GISEL-NEXT: v_sub_f32_e32 v7, v3, v6 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v4, s5, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v7 +; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v6 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1 -; VI-GISEL-NEXT: v_log_f32_e32 v4, v4 +; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a2000, v7 ; VI-GISEL-NEXT: v_log_f32_e32 v2, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; VI-GISEL-NEXT: v_sub_f32_e32 v1, v4, v5 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000 +; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v6 +; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, v6, s[2:3] +; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v5, s[0:1] +; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 +; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v2 +; VI-GISEL-NEXT: v_sub_f32_e32 v6, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v7, v8, v7 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v6 +; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v6 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 -; VI-GISEL-NEXT: v_mov_b32_e32 v4, s1 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, s0 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, s8 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, s9 ; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2] ; VI-GISEL-NEXT: s_endpgm ; ; GFX900-SDAG-LABEL: s_log10_v3f32: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX900-SDAG-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s7, 0x3284fbcf ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, s6, v4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s6, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x3e9a209a +; GFX900-SDAG-NEXT: s_mov_b32 s10, 0x7f800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s6, -v4 +; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s7, v5 +; GFX900-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s10 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1] +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, v1, s[0:1] +; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, s5, v4 ; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v4 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v4, v3 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0x411a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s5, v6 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1 -; GFX900-SDAG-NEXT: v_log_f32_e32 v6, v6 -; GFX900-SDAG-NEXT: v_log_f32_e32 v7, v1 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v3 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v6, v4 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v7, v0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX900-SDAG-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v6 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v4 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v7, v4, s6, -v6 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v7, v4, s7, v7 +; GFX900-SDAG-NEXT: v_add_f32_e32 v6, v6, v7 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s10 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v4, v6, s[2:3] +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] +; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v6, v0, s6, -v4 +; GFX900-SDAG-NEXT: v_fma_f32 v6, v0, s7, v6 +; GFX900-SDAG-NEXT: v_add_f32_e32 v4, v4, v6 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s10 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1] +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v5, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v4 +; GFX900-SDAG-NEXT: global_store_dwordx3 v3, v[0:2], s[8:9] ; GFX900-SDAG-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: s_log10_v3f32: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX900-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 ; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: s_mov_b32 s2, 0x3284fbcf ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, s5 -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v1, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x3e9a209a +; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x7f800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v5, v0, s0, -v4 +; GFX900-GISEL-NEXT: v_fma_f32 v5, v0, s2, v5 +; GFX900-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1] +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v2, s[0:1] ; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, s5, v4 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v4, v4 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x411a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3e9a209a +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v4 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v9, v4, v3, -v8 ; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v4, v5 +; GFX900-GISEL-NEXT: v_fma_f32 v9, v4, s2, v9 +; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, v8, s[2:3] +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1] +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x3284fbcf +; GFX900-GISEL-NEXT: v_fma_f32 v3, v2, v3, -v4 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v2, v6, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v7, vcc ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0 -; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] +; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[8:9] ; GFX900-GISEL-NEXT: s_endpgm ; ; GFX1100-SDAG-LABEL: s_log10_v3f32: @@ -725,26 +1114,47 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s5 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s4 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s2 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x4f800000, s3 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 1.0, 0x4f800000, s7 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s2 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s3 -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v3, s6, v3 :: v_dual_mul_f32 v4, s5, v4 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s7 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, v3 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v5, s4, v5 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v4, v4 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s7 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v9, 0, 0x411a209b, s2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v10, 0, 0x411a209b, s3 +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v0, s6, v0 :: v_dual_mul_f32 v1, s5, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v2, s4, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v3, v0 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v5, v5 -; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v4, v1 :: v_dual_mov_b32 v4, 0 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v1 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3) +; GFX1100-SDAG-NEXT: v_fma_f32 v6, 0x3e9a209a, v0, -v3 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_dual_sub_f32 v3, v5, v2 :: v_dual_mul_f32 v2, 0x3e9a209b, v0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v1, 0x3e9a209b, v1 :: v_dual_mul_f32 v0, 0x3e9a209b, v3 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v2 +; GFX1100-SDAG-NEXT: v_fma_f32 v7, 0x3e9a209a, v1, -v4 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v6, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_fma_f32 v8, 0x3e9a209a, v2, -v5 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v7, 0x3284fbcf, v1 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v3, v3, v6 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, 0x411a209b, s7 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v4, v4, v7 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v8, 0x3284fbcf, v2 :: v_dual_cndmask_b32 v1, v1, v4 +; GFX1100-SDAG-NEXT: v_dual_add_f32 v5, v5, v8 :: v_dual_mov_b32 v4, 0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v1, v1, v10 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v3, v2, v5, vcc_lo +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v2, v0, v9 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v3, v6 ; GFX1100-SDAG-NEXT: global_store_b96 v4, v[0:2], s[0:1] ; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-SDAG-NEXT: s_endpgm @@ -763,32 +1173,52 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s7 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, s3 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 0x411a209b, s3 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 0x411a209b, s2 ; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s4, v0 :: v_dual_mul_f32 v1, s5, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s7 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, s6, v2 :: v_dual_sub_f32 v1, v1, v4 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2 -; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v3 :: v_dual_mov_b32 v3, 0 -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v1, 0x3e9a209b, v1 :: v_dual_mul_f32 v0, 0x3e9a209b, v0 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v1 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3) +; GFX1100-GISEL-NEXT: v_fma_f32 v6, v0, 0x3e9a209a, -v3 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v5 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v2 +; GFX1100-GISEL-NEXT: v_fma_f32 v7, 0x3e9a209a, v1, -v4 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v6, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: v_fma_f32 v8, 0x3e9a209a, v2, -v5 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v7, 0x3284fbcf, v1 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v3, v3, v6 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x411a209b, s7 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v4, v4, v7 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| +; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v8, 0x3284fbcf, v2 :: v_dual_mov_b32 v3, 0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2| +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v1, v1, v10 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_dual_add_f32 v5, v5, v8 :: v_dual_sub_f32 v0, v0, v9 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2 +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6 ; GFX1100-GISEL-NEXT: global_store_b96 v3, v[0:2], s[0:1] ; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_log10_v3f32: ; R600: ; %bb.0: -; R600-NEXT: ALU 33, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: ALU 62, @4, KC0[CB0:0-32], KC1[] ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0 -; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1 ; R600-NEXT: CF_END ; R600-NEXT: ALU clause starting at 4: ; R600-NEXT: SETGT T0.W, literal.x, KC0[3].Z, @@ -799,80 +1229,137 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; R600-NEXT: MUL_IEEE T2.W, KC0[3].Z, PV.W, ; R600-NEXT: CNDE * T3.W, T1.W, 1.0, literal.x, ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T0.Y, KC0[3].Y, PS, -; R600-NEXT: CNDE T0.Z, T0.W, 0.0, literal.x, -; R600-NEXT: SETGT T0.W, literal.y, KC0[3].W, +; R600-NEXT: MUL_IEEE T0.Z, KC0[3].Y, PS, +; R600-NEXT: SETGT T3.W, literal.x, KC0[3].W, ; R600-NEXT: LOG_IEEE * T0.X, PV.W, -; R600-NEXT: 1107296256(3.200000e+01), 8388608(1.175494e-38) -; R600-NEXT: CNDE T1.Y, T1.W, 0.0, literal.x, -; R600-NEXT: CNDE T1.Z, PV.W, 1.0, literal.y, -; R600-NEXT: ADD T1.W, PS, -PV.Z, -; R600-NEXT: LOG_IEEE * T0.X, PV.Y, -; R600-NEXT: 1107296256(3.200000e+01), 1333788672(4.294967e+09) -; R600-NEXT: MUL_IEEE T0.Y, PV.W, literal.x, -; R600-NEXT: MUL_IEEE T1.W, KC0[3].W, PV.Z, -; R600-NEXT: ADD * T2.W, PS, -PV.Y, -; R600-NEXT: 1050288283(3.010300e-01), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T0.X, PS, literal.x, -; R600-NEXT: CNDE T0.W, T0.W, 0.0, literal.y, -; R600-NEXT: LOG_IEEE * T0.Z, PV.W, -; R600-NEXT: 1050288283(3.010300e-01), 1107296256(3.200000e+01) -; R600-NEXT: LSHR T1.X, KC0[2].Y, literal.x, -; R600-NEXT: ADD * T0.W, PS, -PV.W, -; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T2.X, PV.W, literal.x, -; R600-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, -; R600-NEXT: 1050288283(3.010300e-01), 8(1.121039e-44) +; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) +; R600-NEXT: AND_INT T1.Z, PS, literal.x, +; R600-NEXT: CNDE T2.W, PV.W, 1.0, literal.y, +; R600-NEXT: LOG_IEEE * T0.Y, PV.Z, +; R600-NEXT: -4096(nan), 1333788672(4.294967e+09) +; R600-NEXT: MUL_IEEE T0.Z, KC0[3].W, PV.W, +; R600-NEXT: ADD T2.W, T0.X, -PV.Z, +; R600-NEXT: AND_INT * T4.W, PS, literal.x, +; R600-NEXT: -4096(nan), 0(0.000000e+00) +; R600-NEXT: ADD T2.Z, T0.Y, -PS, +; R600-NEXT: MUL_IEEE T5.W, PV.W, literal.x, +; R600-NEXT: LOG_IEEE * T0.Z, PV.Z, +; R600-NEXT: 916096251(4.605039e-06), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T3.Z, T1.Z, literal.x, PV.W, +; R600-NEXT: AND_INT T5.W, PS, literal.y, +; R600-NEXT: MUL_IEEE * T6.W, PV.Z, literal.x, +; R600-NEXT: 916096251(4.605039e-06), -4096(nan) +; R600-NEXT: MULADD_IEEE T4.Z, T4.W, literal.x, PS, +; R600-NEXT: ADD T6.W, T0.Z, -PV.W, +; R600-NEXT: MULADD_IEEE * T2.W, T2.W, literal.y, PV.Z, BS:VEC_021/SCL_122 +; R600-NEXT: 916096251(4.605039e-06), 1050288128(3.010254e-01) +; R600-NEXT: MULADD_IEEE T1.Y, T1.Z, literal.x, PS, +; R600-NEXT: SETGT T1.Z, literal.y, |T0.X|, +; R600-NEXT: MUL_IEEE T2.W, PV.W, literal.z, +; R600-NEXT: MULADD_IEEE * T7.W, T2.Z, literal.x, PV.Z, BS:VEC_021/SCL_122 +; R600-NEXT: 1050288128(3.010254e-01), 2139095040(INF) +; R600-NEXT: 916096251(4.605039e-06), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T1.X, T4.W, literal.x, PS, +; R600-NEXT: SETGT T2.Y, literal.y, |T0.Y|, +; R600-NEXT: MULADD_IEEE T2.Z, T5.W, literal.z, PV.W, BS:VEC_120/SCL_212 +; R600-NEXT: CNDE T2.W, PV.Z, T0.X, PV.Y, +; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.w, +; R600-NEXT: 1050288128(3.010254e-01), 2139095040(INF) +; R600-NEXT: 916096251(4.605039e-06), 1092231323(9.632960e+00) +; R600-NEXT: ADD T1.Y, PV.W, -PS, +; R600-NEXT: MULADD_IEEE T1.Z, T6.W, literal.x, PV.Z, +; R600-NEXT: CNDE T0.W, PV.Y, T0.Y, PV.X, +; R600-NEXT: CNDE * T1.W, T1.W, 0.0, literal.y, +; R600-NEXT: 1050288128(3.010254e-01), 1092231323(9.632960e+00) +; R600-NEXT: ADD T1.X, PV.W, -PS, +; R600-NEXT: MULADD_IEEE T0.W, T5.W, literal.x, PV.Z, +; R600-NEXT: SETGT * T1.W, literal.y, |T0.Z|, +; R600-NEXT: 1050288128(3.010254e-01), 2139095040(INF) +; R600-NEXT: LSHR T0.X, KC0[2].Y, literal.x, +; R600-NEXT: CNDE T0.W, PS, T0.Z, PV.W, +; R600-NEXT: CNDE * T1.W, T3.W, 0.0, literal.y, +; R600-NEXT: 2(2.802597e-45), 1092231323(9.632960e+00) +; R600-NEXT: ADD T2.X, PV.W, -PS, +; R600-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; R600-NEXT: 8(1.121039e-44), 0(0.000000e+00) ; R600-NEXT: LSHR * T3.X, PV.W, literal.x, ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; ; CM-LABEL: s_log10_v3f32: ; CM: ; %bb.0: -; CM-NEXT: ALU 40, @4, KC0[CB0:0-32], KC1[] -; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T3.X -; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2.X, T1.X +; CM-NEXT: ALU 68, @4, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T2.X +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T4.X, T1.X ; CM-NEXT: CF_END ; CM-NEXT: ALU clause starting at 4: -; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].W, +; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].Y, ; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) +; CM-NEXT: CNDE T0.Z, PV.W, 1.0, literal.x, +; CM-NEXT: SETGT * T1.W, literal.y, KC0[3].W, +; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38) ; CM-NEXT: CNDE T0.Y, PV.W, 1.0, literal.x, -; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].Z, -; CM-NEXT: SETGT * T1.W, literal.y, KC0[3].Y, +; CM-NEXT: SETGT T1.Z, literal.y, KC0[3].Z, +; CM-NEXT: MUL_IEEE * T2.W, KC0[3].Y, PV.Z, ; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38) -; CM-NEXT: CNDE T0.X, PV.W, 0.0, literal.x, -; CM-NEXT: CNDE T1.Y, PV.Z, 1.0, literal.y, -; CM-NEXT: CNDE T1.Z, T0.W, 0.0, literal.x, -; CM-NEXT: MUL_IEEE * T0.W, KC0[3].W, PV.Y, -; CM-NEXT: 1107296256(3.200000e+01), 1333788672(4.294967e+09) -; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.W, -; CM-NEXT: LOG_IEEE T0.Y, T0.W, -; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, -; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, -; CM-NEXT: CNDE T1.X, T1.W, 1.0, literal.x, -; CM-NEXT: CNDE T2.Y, T0.Z, 0.0, literal.y, -; CM-NEXT: ADD T0.Z, PV.Y, -T1.Z, -; CM-NEXT: MUL_IEEE * T0.W, KC0[3].Z, T1.Y, -; CM-NEXT: 1333788672(4.294967e+09), 1107296256(3.200000e+01) -; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.W, -; CM-NEXT: LOG_IEEE T0.Y, T0.W, -; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, -; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, -; CM-NEXT: MUL_IEEE T2.X, T0.Z, literal.x, -; CM-NEXT: ADD T0.Y, PV.Y, -T2.Y, -; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y, -; CM-NEXT: MUL_IEEE * T0.W, KC0[3].Y, T1.X, -; CM-NEXT: 1050288283(3.010300e-01), 8(1.121039e-44) -; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.W, -; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W, -; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, -; CM-NEXT: LOG_IEEE * T0.W, T0.W, -; CM-NEXT: LSHR T1.X, T0.Z, literal.x, -; CM-NEXT: MUL_IEEE T0.Y, T0.Y, literal.y, -; CM-NEXT: ADD * T0.W, PV.W, -T0.X, -; CM-NEXT: 2(2.802597e-45), 1050288283(3.010300e-01) -; CM-NEXT: MUL_IEEE * T0.X, PV.W, literal.x, -; CM-NEXT: 1050288283(3.010300e-01), 0(0.000000e+00) -; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x, +; CM-NEXT: LOG_IEEE T0.X, T2.W, +; CM-NEXT: LOG_IEEE T0.Y (MASKED), T2.W, +; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W, +; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W, +; CM-NEXT: CNDE T1.Y, T1.Z, 1.0, literal.x, +; CM-NEXT: AND_INT T0.Z, PV.X, literal.y, +; CM-NEXT: MUL_IEEE * T2.W, KC0[3].W, T0.Y, +; CM-NEXT: 1333788672(4.294967e+09), -4096(nan) +; CM-NEXT: LOG_IEEE T0.X (MASKED), T2.W, +; CM-NEXT: LOG_IEEE T0.Y, T2.W, +; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W, +; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W, +; CM-NEXT: ADD T2.Y, T0.X, -T0.Z, +; CM-NEXT: AND_INT T2.Z, PV.Y, literal.x, +; CM-NEXT: MUL_IEEE * T2.W, KC0[3].Z, T1.Y, +; CM-NEXT: -4096(nan), 0(0.000000e+00) +; CM-NEXT: LOG_IEEE T1.X, T2.W, +; CM-NEXT: LOG_IEEE T1.Y (MASKED), T2.W, +; CM-NEXT: LOG_IEEE T1.Z (MASKED), T2.W, +; CM-NEXT: LOG_IEEE * T1.W (MASKED), T2.W, +; CM-NEXT: ADD T1.Y, T0.Y, -T2.Z, +; CM-NEXT: AND_INT T3.Z, PV.X, literal.x, +; CM-NEXT: MUL_IEEE * T2.W, T2.Y, literal.y, BS:VEC_120/SCL_212 +; CM-NEXT: -4096(nan), 916096251(4.605039e-06) +; CM-NEXT: MULADD_IEEE T3.Y, T0.Z, literal.x, PV.W, +; CM-NEXT: ADD T4.Z, T1.X, -PV.Z, +; CM-NEXT: MUL_IEEE * T2.W, PV.Y, literal.x, +; CM-NEXT: 916096251(4.605039e-06), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE T4.Y, T2.Z, literal.x, PV.W, +; CM-NEXT: MUL_IEEE T5.Z, PV.Z, literal.x, +; CM-NEXT: MULADD_IEEE * T2.W, T2.Y, literal.y, PV.Y, +; CM-NEXT: 916096251(4.605039e-06), 1050288128(3.010254e-01) +; CM-NEXT: MULADD_IEEE T2.Y, T0.Z, literal.x, PV.W, +; CM-NEXT: MULADD_IEEE T0.Z, T3.Z, literal.y, PV.Z, BS:VEC_120/SCL_212 +; CM-NEXT: MULADD_IEEE * T2.W, T1.Y, literal.x, PV.Y, +; CM-NEXT: 1050288128(3.010254e-01), 916096251(4.605039e-06) +; CM-NEXT: SETGT T2.X, literal.x, |T0.X|, +; CM-NEXT: MULADD_IEEE T1.Y, T2.Z, literal.y, PV.W, +; CM-NEXT: SETGT T2.Z, literal.x, |T0.Y|, +; CM-NEXT: MULADD_IEEE * T2.W, T4.Z, literal.y, PV.Z, BS:VEC_120/SCL_212 +; CM-NEXT: 2139095040(INF), 1050288128(3.010254e-01) +; CM-NEXT: MULADD_IEEE T3.X, T3.Z, literal.x, PV.W, +; CM-NEXT: SETGT T3.Y, literal.y, |T1.X|, +; CM-NEXT: CNDE T0.Z, PV.Z, T0.Y, PV.Y, +; CM-NEXT: CNDE * T1.W, T1.W, 0.0, literal.z, +; CM-NEXT: 1050288128(3.010254e-01), 2139095040(INF) +; CM-NEXT: 1092231323(9.632960e+00), 0(0.000000e+00) +; CM-NEXT: ADD T4.X, PV.Z, -PV.W, +; CM-NEXT: CNDE T0.Y, PV.Y, T1.X, PV.X, +; CM-NEXT: CNDE T0.Z, T1.Z, 0.0, literal.x, +; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; CM-NEXT: 1092231323(9.632960e+00), 8(1.121039e-44) +; CM-NEXT: LSHR T1.X, PV.W, literal.x, +; CM-NEXT: ADD T0.Y, PV.Y, -PV.Z, +; CM-NEXT: CNDE T0.Z, T2.X, T0.X, T2.Y, +; CM-NEXT: CNDE * T0.W, T0.W, 0.0, literal.y, +; CM-NEXT: 2(2.802597e-45), 1092231323(9.632960e+00) +; CM-NEXT: ADD * T0.X, PV.Z, -PV.W, +; CM-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %result = call <3 x float> @llvm.log10.v3f32(<3 x float> %in) store <3 x float> %result, ptr addrspace(1) %out @@ -884,255 +1371,433 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in) define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; SI-SDAG-LABEL: s_log10_v4f32: ; SI-SDAG: ; %bb.0: -; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-SDAG-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd +; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x4f800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: s_mov_b32 s12, 0x3284fbcf ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v4, vcc -; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 -; SI-SDAG-NEXT: v_mul_f32_e32 v3, s7, v3 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v4, vcc -; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 -; SI-SDAG-NEXT: v_mul_f32_e32 v6, s6, v6 -; SI-SDAG-NEXT: v_log_f32_e32 v6, v6 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc -; SI-SDAG-NEXT: v_sub_f32_e32 v2, v3, v2 -; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0 -; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209b, v2 -; SI-SDAG-NEXT: v_sub_f32_e32 v2, v6, v5 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v4, vcc -; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v6, s5, v6 -; SI-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1 +; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s11, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v2, s11, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s11, 0x3e9a209a +; SI-SDAG-NEXT: s_mov_b32 s13, 0x7f800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v5, 0x411a209b +; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v2 +; SI-SDAG-NEXT: v_fma_f32 v4, v2, s11, -v3 +; SI-SDAG-NEXT: v_fma_f32 v4, v2, s12, v4 +; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s13 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s10, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1] +; SI-SDAG-NEXT: v_mul_f32_e32 v3, s10, v3 +; SI-SDAG-NEXT: v_log_f32_e32 v4, v3 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v2, v3 +; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s9, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v4 +; SI-SDAG-NEXT: v_fma_f32 v6, v4, s11, -v2 +; SI-SDAG-NEXT: v_fma_f32 v6, v4, s12, v6 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v6 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v6, s9, v6 ; SI-SDAG-NEXT: v_log_f32_e32 v6, v6 -; SI-SDAG-NEXT: v_log_f32_e32 v4, v1 -; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 -; SI-SDAG-NEXT: s_mov_b32 s2, -1 -; SI-SDAG-NEXT: v_sub_f32_e32 v1, v6, v5 -; SI-SDAG-NEXT: v_sub_f32_e32 v0, v4, v0 -; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2 -; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s13 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3] +; SI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s8, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] +; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v4 +; SI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v6 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, s8, v0 +; SI-SDAG-NEXT: v_fma_f32 v7, v6, s11, -v4 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_fma_f32 v7, v6, s12, v7 +; SI-SDAG-NEXT: v_add_f32_e32 v4, v4, v7 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, s13 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v6, v4, s[2:3] +; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v5, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4 +; SI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v6, v0, s11, -v4 +; SI-SDAG-NEXT: v_fma_f32 v6, v0, s12, v6 +; SI-SDAG-NEXT: v_add_f32_e32 v4, v4, v6 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s13 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] +; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s6, -1 +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v4 +; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-SDAG-NEXT: s_endpgm ; ; SI-GISEL-LABEL: s_log10_v4f32: ; SI-GISEL: ; %bb.0: -; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-GISEL-NEXT: s_mov_b32 s2, 0x800000 +; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd +; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; SI-GISEL-NEXT: s_mov_b32 s0, 0x800000 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42000000 +; SI-GISEL-NEXT: s_mov_b32 s2, 0x3284fbcf ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v5, s5 +; SI-GISEL-NEXT: v_mov_b32_e32 v0, s8 +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v5, s5, v5 -; SI-GISEL-NEXT: v_log_f32_e32 v5, v5 +; SI-GISEL-NEXT: s_mov_b32 s0, 0x3e9a209a +; SI-GISEL-NEXT: s_mov_b32 s3, 0x7f800000 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 -; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2 -; SI-GISEL-NEXT: v_sub_f32_e32 v1, v5, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc -; SI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v4, vcc -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v5, s6, v5 -; SI-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2 -; SI-GISEL-NEXT: v_log_f32_e32 v5, v5 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v5, v0, s0, -v1 +; SI-GISEL-NEXT: v_fma_f32 v5, v0, s2, v5 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v5 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s3 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1] +; SI-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x411a209b +; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x3e9a209a +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8 +; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v1 +; SI-GISEL-NEXT: v_fma_f32 v9, v1, v4, -v8 +; SI-GISEL-NEXT: v_fma_f32 v9, v1, s2, v9 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v9, 1.0, v3, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v9, s10, v9 +; SI-GISEL-NEXT: v_log_f32_e32 v9, v9 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, s3 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[2:3] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v7, s[0:1] +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1] +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8 +; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v9 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x3284fbcf +; SI-GISEL-NEXT: v_fma_f32 v10, v9, v4, -v8 ; SI-GISEL-NEXT: v_log_f32_e32 v3, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; SI-GISEL-NEXT: v_sub_f32_e32 v2, v5, v6 +; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 +; SI-GISEL-NEXT: v_fma_f32 v10, v9, v6, v10 +; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v10 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v9|, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v8, s[2:3] +; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8 +; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v3 +; SI-GISEL-NEXT: v_fma_f32 v4, v3, v4, -v8 +; SI-GISEL-NEXT: v_fma_f32 v4, v3, v6, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v4, v8, v4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1] ; SI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 -; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 -; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2 -; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209b, v3 -; SI-GISEL-NEXT: s_mov_b32 s2, -1 -; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 -; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; SI-GISEL-NEXT: s_mov_b32 s6, -1 +; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-GISEL-NEXT: s_endpgm ; ; VI-SDAG-LABEL: s_log10_v4f32: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; VI-SDAG-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc -; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v4, s7, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v6, s6, v6 -; VI-SDAG-NEXT: v_log_f32_e32 v4, v4 -; VI-SDAG-NEXT: v_log_f32_e32 v6, v6 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v2, s7, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v2, v2 +; VI-SDAG-NEXT: s_mov_b32 s7, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v2 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v2, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s7 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1] +; VI-SDAG-NEXT: v_mul_f32_e32 v3, s6, v3 +; VI-SDAG-NEXT: v_log_f32_e32 v4, v3 +; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x411a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v2, v3 +; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xfffff000, v4 +; VI-SDAG-NEXT: v_sub_f32_e32 v6, v4, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3e9a2000, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v6, v8, v6 +; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v3, v4, v3 -; VI-SDAG-NEXT: v_sub_f32_e32 v4, v6, v5 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v2, vcc -; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v6 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v1, vcc ; VI-SDAG-NEXT: v_mul_f32_e32 v6, s5, v6 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v6, v6 -; VI-SDAG-NEXT: v_log_f32_e32 v7, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209b, v3 -; VI-SDAG-NEXT: v_sub_f32_e32 v1, v6, v5 -; VI-SDAG-NEXT: v_sub_f32_e32 v0, v7, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v5, s1 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v4, s0 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s7 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3] +; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_and_b32_e32 v4, 0xfffff000, v6 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s4, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v7, v6, v4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] +; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3e9a2000, v7 +; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v7 +; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v7, v9, v7 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v7, v8, v7 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a2000, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v7 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, s7 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v6, v4, s[2:3] +; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v5, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_and_b32_e32 v4, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v6, v0, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3e9a2000, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v6, v8, v6 +; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a2000, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v6 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s7 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v4 +; VI-SDAG-NEXT: v_mov_b32_e32 v4, s8 +; VI-SDAG-NEXT: v_mov_b32_e32 v5, s9 ; VI-SDAG-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-SDAG-NEXT: s_endpgm ; ; VI-GISEL-LABEL: s_log10_v4f32: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 ; VI-GISEL-NEXT: s_mov_b32 s2, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42000000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: s_mov_b32 s0, 0x7f800000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v0 -; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc ; VI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_mul_f32_e32 v5, s5, v5 -; VI-GISEL-NEXT: v_log_f32_e32 v5, v5 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 -; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2 -; VI-GISEL-NEXT: v_sub_f32_e32 v1, v5, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v4, vcc -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v5, s6, v5 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x800000 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a2000, v4 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v2, s[0:1] +; VI-GISEL-NEXT: v_mul_f32_e32 v1, s5, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x411a209b +; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v6 +; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v1 +; VI-GISEL-NEXT: v_sub_f32_e32 v7, v1, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v7 +; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v6 +; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a2000, v7 +; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v6 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v7, s6, v7 +; VI-GISEL-NEXT: v_log_f32_e32 v7, v7 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[2:3] +; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v5, s[0:1] +; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6 +; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v7 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v8, v7, v6 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v2, s[0:1] +; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v10, 0x369a84fb, v6 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2 -; VI-GISEL-NEXT: v_log_f32_e32 v5, v5 +; VI-GISEL-NEXT: v_add_f32_e32 v9, v10, v9 +; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a2000, v8 ; VI-GISEL-NEXT: v_log_f32_e32 v3, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; VI-GISEL-NEXT: v_sub_f32_e32 v2, v5, v6 +; VI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v6 +; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v7|, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v7, v6, s[2:3] +; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6 +; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v7, v3, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v7 +; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v6 +; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a2000, v7 +; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v6 +; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 -; VI-GISEL-NEXT: v_mov_b32_e32 v5, s1 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209b, v3 -; VI-GISEL-NEXT: v_mov_b32_e32 v4, s0 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, s8 +; VI-GISEL-NEXT: v_mov_b32_e32 v5, s9 ; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-GISEL-NEXT: s_endpgm ; ; GFX900-SDAG-LABEL: s_log10_v4f32: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX900-SDAG-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0x4f800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: s_mov_b32 s10, 0x3284fbcf ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v4, vcc -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s7, v3 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v4, vcc -; GFX900-SDAG-NEXT: v_log_f32_e32 v3, v3 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s6, v6 -; GFX900-SDAG-NEXT: v_log_f32_e32 v6, v6 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc -; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v3, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s7, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s7, 0x3e9a209a +; GFX900-SDAG-NEXT: s_mov_b32 s11, 0x7f800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v6, 0x411a209b +; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s7, -v3 +; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s10, v5 +; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s11 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1] +; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s6, v3 +; GFX900-SDAG-NEXT: v_log_f32_e32 v5, v3 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v6, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v2, v3 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209b, v2 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v6, v5 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v4, vcc -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s5, v6 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1 -; GFX900-SDAG-NEXT: v_log_f32_e32 v6, v6 -; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v1 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v7, 0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v6, v5 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v4, v0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX900-SDAG-NEXT: global_store_dwordx4 v7, v[0:3], s[2:3] +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v5 +; GFX900-SDAG-NEXT: v_fma_f32 v7, v5, s7, -v2 +; GFX900-SDAG-NEXT: v_fma_f32 v7, v5, s10, v7 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v7 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v7, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v7, s5, v7 +; GFX900-SDAG-NEXT: v_log_f32_e32 v7, v7 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v5|, s11 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[2:3] +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[0:1] +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] +; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v5 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v7 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v8, v7, s7, -v5 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v8, v7, s10, v8 +; GFX900-SDAG-NEXT: v_add_f32_e32 v5, v5, v8 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v7|, s11 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v7, v5, s[2:3] +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v6, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v5 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v7, v0, s7, -v5 +; GFX900-SDAG-NEXT: v_fma_f32 v7, v0, s10, v7 +; GFX900-SDAG-NEXT: v_add_f32_e32 v5, v5, v7 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s11 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[0:1] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v5 +; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[8:9] ; GFX900-SDAG-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: s_log10_v4f32: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX900-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 ; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x800000 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x42000000 +; GFX900-GISEL-NEXT: s_mov_b32 s2, 0x3284fbcf ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, s5 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, s5, v5 -; GFX900-GISEL-NEXT: v_log_f32_e32 v5, v5 +; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x3e9a209a +; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x7f800000 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v5, v0, s0, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v5, v0, s2, v5 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v5 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1] +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s5, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x411a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x3e9a209a +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v9, v1, v4, -v8 +; GFX900-GISEL-NEXT: v_fma_f32 v9, v1, s2, v9 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2 -; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v5, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v4, vcc -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, s6, v5 +; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v9, 1.0, v3, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e32 v9, s6, v9 +; GFX900-GISEL-NEXT: v_log_f32_e32 v9, v9 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, s3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[2:3] +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v7, s[0:1] +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1] +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v9 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2 -; GFX900-GISEL-NEXT: v_log_f32_e32 v5, v5 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x3284fbcf +; GFX900-GISEL-NEXT: v_fma_f32 v10, v9, v4, -v8 ; GFX900-GISEL-NEXT: v_log_f32_e32 v3, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v5, v6 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 +; GFX900-GISEL-NEXT: v_fma_f32 v10, v9, v6, v10 +; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v10 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v9|, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v8, s[2:3] +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v3 +; GFX900-GISEL-NEXT: v_fma_f32 v4, v3, v4, -v8 +; GFX900-GISEL-NEXT: v_fma_f32 v4, v3, v6, v4 +; GFX900-GISEL-NEXT: v_add_f32_e32 v4, v8, v4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1] ; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209b, v3 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] +; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[8:9] ; GFX900-GISEL-NEXT: s_endpgm ; ; GFX1100-SDAG-LABEL: s_log10_v4f32: @@ -1146,32 +1811,52 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s8, 0x800000, s5 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s9, 0x800000, s4 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x4f800000, s2 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 1.0, 0x4f800000, s3 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v6, 1.0, 0x4f800000, s8 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v7, 1.0, 0x4f800000, s9 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s8 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s9 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 0x411a209b, s2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v4, s7, v4 :: v_dual_mul_f32 v5, s6, v5 -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v6, s5, v6 :: v_dual_mul_f32 v7, s4, v7 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s3 +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v0, s7, v0 :: v_dual_mul_f32 v1, s6, v1 +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, s5, v2 :: v_dual_mul_f32 v3, s4, v3 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v9, 0, 0x411a209b, s3 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1100-SDAG-NEXT: v_log_f32_e32 v4, v4 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v5, v5 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(TRANS32_DEP_3) -; GFX1100-SDAG-NEXT: v_log_f32_e32 v6, v6 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v7, v7 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s8 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s9 -; GFX1100-SDAG-NEXT: v_dual_sub_f32 v0, v4, v0 :: v_dual_sub_f32 v1, v5, v1 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, v3 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v14, 0, 0x411a209b, s8 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v15, 0, 0x411a209b, s9 +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v5, 0x3e9a209a, v0 :: v_dual_mul_f32 v6, 0x3e9a209a, v1 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_dual_sub_f32 v4, v6, v2 :: v_dual_sub_f32 v5, v7, v3 -; GFX1100-SDAG-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mul_f32 v3, 0x3e9a209b, v0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, 0x3e9a209b, v1 :: v_dual_mul_f32 v1, 0x3e9a209b, v4 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v5 -; GFX1100-SDAG-NEXT: global_store_b128 v6, v[0:3], s[0:1] +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v7, 0x3e9a209a, v2 :: v_dual_mul_f32 v8, 0x3e9a209a, v3 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: v_fma_f32 v10, 0x3e9a209a, v0, -v5 +; GFX1100-SDAG-NEXT: v_fma_f32 v11, 0x3e9a209a, v1, -v6 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX1100-SDAG-NEXT: v_fma_f32 v12, 0x3e9a209a, v2, -v7 +; GFX1100-SDAG-NEXT: v_fma_f32 v13, 0x3e9a209a, v3, -v8 +; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v10, 0x3284fbcf, v0 :: v_dual_fmac_f32 v11, 0x3284fbcf, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v12, 0x3284fbcf, v2 :: v_dual_fmac_f32 v13, 0x3284fbcf, v3 +; GFX1100-SDAG-NEXT: v_dual_add_f32 v5, v5, v10 :: v_dual_add_f32 v6, v6, v11 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_dual_add_f32 v7, v7, v12 :: v_dual_add_f32 v8, v8, v13 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2| +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v5, v2, v7, vcc_lo +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v3| +; GFX1100-SDAG-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_sub_f32 v2, v1, v9 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v6, v3, v8, vcc_lo +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v3, v0, v4 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v5, v14 :: v_dual_sub_f32 v0, v6, v15 +; GFX1100-SDAG-NEXT: global_store_b128 v7, v[0:3], s[0:1] ; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-SDAG-NEXT: s_endpgm ; @@ -1191,133 +1876,226 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s8 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s9 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, s2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x411a209b, s2 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s4, v0 :: v_dual_mul_f32 v1, s5, v1 ; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, s6, v2 :: v_dual_mul_f32 v3, s7, v3 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s3 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 0x411a209b, s3 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(TRANS32_DEP_3) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v3, v3 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x42000000, s8 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 0x42000000, s9 -; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_sub_f32 v1, v1, v5 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 0x411a209b, s8 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 0x411a209b, s9 +; GFX1100-GISEL-NEXT: v_dual_mul_f32 v5, 0x3e9a209a, v0 :: v_dual_mul_f32 v6, 0x3e9a209a, v1 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_dual_sub_f32 v2, v2, v6 :: v_dual_sub_f32 v3, v3, v7 -; GFX1100-GISEL-NEXT: v_mov_b32_e32 v4, 0 -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, 0x3e9a209b, v0 :: v_dual_mul_f32 v1, 0x3e9a209b, v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, 0x3e9a209b, v2 :: v_dual_mul_f32 v3, 0x3e9a209b, v3 +; GFX1100-GISEL-NEXT: v_dual_mul_f32 v7, 0x3e9a209a, v2 :: v_dual_mul_f32 v8, 0x3e9a209a, v3 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: v_fma_f32 v10, v0, 0x3e9a209a, -v5 +; GFX1100-GISEL-NEXT: v_fma_f32 v11, 0x3e9a209a, v1, -v6 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX1100-GISEL-NEXT: v_fma_f32 v12, 0x3e9a209a, v2, -v7 +; GFX1100-GISEL-NEXT: v_fma_f32 v13, 0x3e9a209a, v3, -v8 +; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v10, 0x3284fbcf, v0 :: v_dual_fmac_f32 v11, 0x3284fbcf, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v12, 0x3284fbcf, v2 :: v_dual_fmac_f32 v13, 0x3284fbcf, v3 +; GFX1100-GISEL-NEXT: v_dual_add_f32 v5, v5, v10 :: v_dual_add_f32 v6, v6, v11 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_dual_add_f32 v7, v7, v12 :: v_dual_add_f32 v8, v8, v13 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2| +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_sub_f32 v1, v1, v9 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v3| +; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v3, v3, v8 :: v_dual_sub_f32 v2, v2, v14 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v3, v3, v15 ; GFX1100-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1] ; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_log10_v4f32: ; R600: ; %bb.0: -; R600-NEXT: ALU 38, @4, KC0[CB0:0-32], KC1[] -; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 +; R600-NEXT: ALU 76, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T0.X, 1 ; R600-NEXT: CF_END ; R600-NEXT: PAD ; R600-NEXT: ALU clause starting at 4: -; R600-NEXT: SETGT T0.W, literal.x, KC0[4].X, -; R600-NEXT: SETGT * T1.W, literal.x, KC0[3].W, +; R600-NEXT: SETGT T0.W, literal.x, KC0[3].Z, +; R600-NEXT: SETGT * T1.W, literal.x, KC0[4].X, ; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) ; R600-NEXT: CNDE * T2.W, PV.W, 1.0, literal.x, ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T0.Z, KC0[4].X, PV.W, -; R600-NEXT: SETGT T2.W, literal.x, KC0[3].Z, +; R600-NEXT: MUL_IEEE T0.Z, KC0[3].Z, PV.W, +; R600-NEXT: SETGT T2.W, literal.x, KC0[3].W, ; R600-NEXT: CNDE * T3.W, T1.W, 1.0, literal.y, ; R600-NEXT: 8388608(1.175494e-38), 1333788672(4.294967e+09) -; R600-NEXT: MUL_IEEE T0.X, KC0[3].W, PS, -; R600-NEXT: SETGT T0.Y, literal.x, KC0[3].Y, -; R600-NEXT: CNDE T1.Z, T0.W, 0.0, literal.y, -; R600-NEXT: CNDE T0.W, PV.W, 1.0, literal.z, -; R600-NEXT: LOG_IEEE * T0.Z, PV.Z, -; R600-NEXT: 8388608(1.175494e-38), 1107296256(3.200000e+01) +; R600-NEXT: MUL_IEEE T1.Z, KC0[4].X, PS, +; R600-NEXT: CNDE T3.W, PV.W, 1.0, literal.x, +; R600-NEXT: LOG_IEEE * T0.X, PV.Z, ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T1.X, KC0[3].Z, PV.W, -; R600-NEXT: ADD T1.Y, PS, -PV.Z, -; R600-NEXT: CNDE T0.Z, T1.W, 0.0, literal.x, -; R600-NEXT: CNDE T0.W, PV.Y, 1.0, literal.y, -; R600-NEXT: LOG_IEEE * T0.X, PV.X, -; R600-NEXT: 1107296256(3.200000e+01), 1333788672(4.294967e+09) -; R600-NEXT: MUL_IEEE T2.X, KC0[3].Y, PV.W, -; R600-NEXT: CNDE T2.Y, T2.W, 0.0, literal.x, -; R600-NEXT: ADD T0.Z, PS, -PV.Z, -; R600-NEXT: MUL_IEEE T0.W, PV.Y, literal.y, -; R600-NEXT: LOG_IEEE * T0.X, PV.X, -; R600-NEXT: 1107296256(3.200000e+01), 1050288283(3.010300e-01) -; R600-NEXT: CNDE T1.Y, T0.Y, 0.0, literal.x, -; R600-NEXT: MUL_IEEE T0.Z, PV.Z, literal.y, -; R600-NEXT: ADD T1.W, PS, -PV.Y, -; R600-NEXT: LOG_IEEE * T0.X, PV.X, -; R600-NEXT: 1107296256(3.200000e+01), 1050288283(3.010300e-01) -; R600-NEXT: MUL_IEEE T0.Y, PV.W, literal.x, -; R600-NEXT: ADD * T1.W, PS, -PV.Y, -; R600-NEXT: 1050288283(3.010300e-01), 0(0.000000e+00) -; R600-NEXT: MUL_IEEE T0.X, PV.W, literal.x, -; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, -; R600-NEXT: 1050288283(3.010300e-01), 2(2.802597e-45) +; R600-NEXT: MUL_IEEE T0.Z, KC0[3].W, PV.W, +; R600-NEXT: SETGT T3.W, literal.x, KC0[3].Y, +; R600-NEXT: LOG_IEEE * T0.Y, PV.Z, +; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) +; R600-NEXT: AND_INT T1.Y, PS, literal.x, +; R600-NEXT: AND_INT T1.Z, T0.X, literal.x, +; R600-NEXT: CNDE T4.W, PV.W, 1.0, literal.y, +; R600-NEXT: LOG_IEEE * T0.Z, PV.Z, +; R600-NEXT: -4096(nan), 1333788672(4.294967e+09) +; R600-NEXT: MUL_IEEE T2.Y, KC0[3].Y, PV.W, +; R600-NEXT: ADD T2.Z, T0.X, -PV.Z, +; R600-NEXT: AND_INT T4.W, PS, literal.x, +; R600-NEXT: ADD * T5.W, T0.Y, -PV.Y, +; R600-NEXT: -4096(nan), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T3.Y, PS, literal.x, +; R600-NEXT: ADD T3.Z, T0.Z, -PV.W, +; R600-NEXT: MUL_IEEE T6.W, PV.Z, literal.x, +; R600-NEXT: LOG_IEEE * T1.X, PV.Y, +; R600-NEXT: 916096251(4.605039e-06), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T2.Y, T1.Z, literal.x, PV.W, +; R600-NEXT: MUL_IEEE T4.Z, PV.Z, literal.x, +; R600-NEXT: AND_INT T6.W, PS, literal.y, +; R600-NEXT: MULADD_IEEE * T7.W, T1.Y, literal.x, PV.Y, BS:VEC_021/SCL_122 +; R600-NEXT: 916096251(4.605039e-06), -4096(nan) +; R600-NEXT: MULADD_IEEE T3.Y, T5.W, literal.x, PS, +; R600-NEXT: ADD T5.Z, T1.X, -PV.W, +; R600-NEXT: MULADD_IEEE T5.W, T4.W, literal.y, PV.Z, BS:VEC_120/SCL_212 +; R600-NEXT: MULADD_IEEE * T7.W, T2.Z, literal.x, PV.Y, BS:VEC_021/SCL_122 +; R600-NEXT: 1050288128(3.010254e-01), 916096251(4.605039e-06) +; R600-NEXT: MULADD_IEEE T2.X, T1.Z, literal.x, PS, +; R600-NEXT: MULADD_IEEE T2.Y, T3.Z, literal.x, PV.W, BS:VEC_120/SCL_212 +; R600-NEXT: MUL_IEEE T1.Z, PV.Z, literal.y, +; R600-NEXT: MULADD_IEEE T5.W, T1.Y, literal.x, PV.Y, +; R600-NEXT: SETGT * T7.W, literal.z, |T0.Y|, +; R600-NEXT: 1050288128(3.010254e-01), 916096251(4.605039e-06) +; R600-NEXT: 2139095040(INF), 0(0.000000e+00) +; R600-NEXT: CNDE T3.X, PS, T0.Y, PV.W, +; R600-NEXT: CNDE T0.Y, T1.W, 0.0, literal.x, +; R600-NEXT: MULADD_IEEE T1.Z, T6.W, literal.y, PV.Z, BS:VEC_120/SCL_212 +; R600-NEXT: MULADD_IEEE T1.W, T4.W, literal.z, PV.Y, BS:VEC_201 +; R600-NEXT: SETGT * T4.W, literal.w, |T0.Z|, +; R600-NEXT: 1092231323(9.632960e+00), 916096251(4.605039e-06) +; R600-NEXT: 1050288128(3.010254e-01), 2139095040(INF) +; R600-NEXT: SETGT T4.X, literal.x, |T0.X|, +; R600-NEXT: CNDE T1.Y, PS, T0.Z, PV.W, +; R600-NEXT: CNDE T0.Z, T2.W, 0.0, literal.y, +; R600-NEXT: MULADD_IEEE T1.W, T5.Z, literal.z, PV.Z, +; R600-NEXT: ADD * T2.W, PV.X, -PV.Y, +; R600-NEXT: 2139095040(INF), 1092231323(9.632960e+00) +; R600-NEXT: 1050288128(3.010254e-01), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T3.X, T6.W, literal.x, PV.W, +; R600-NEXT: SETGT T0.Y, literal.y, |T1.X|, +; R600-NEXT: ADD T2.Z, PV.Y, -PV.Z, +; R600-NEXT: CNDE T1.W, PV.X, T0.X, T2.X, BS:VEC_120/SCL_212 +; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.z, +; R600-NEXT: 1050288128(3.010254e-01), 2139095040(INF) +; R600-NEXT: 1092231323(9.632960e+00), 0(0.000000e+00) +; R600-NEXT: ADD T2.Y, PV.W, -PS, +; R600-NEXT: CNDE T0.W, PV.Y, T1.X, PV.X, +; R600-NEXT: CNDE * T1.W, T3.W, 0.0, literal.x, +; R600-NEXT: 1092231323(9.632960e+00), 0(0.000000e+00) +; R600-NEXT: ADD T2.X, PV.W, -PS, +; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, +; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; ; CM-LABEL: s_log10_v4f32: ; CM: ; %bb.0: -; CM-NEXT: ALU 50, @4, KC0[CB0:0-32], KC1[] -; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X +; CM-NEXT: ALU 84, @4, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2, T0.X ; CM-NEXT: CF_END ; CM-NEXT: PAD ; CM-NEXT: ALU clause starting at 4: -; CM-NEXT: SETGT T0.Z, literal.x, KC0[4].X, ; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].Y, ; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) ; CM-NEXT: CNDE T0.Y, PV.W, 1.0, literal.x, -; CM-NEXT: CNDE T1.Z, PV.Z, 1.0, literal.x, -; CM-NEXT: SETGT * T1.W, literal.y, KC0[3].W, +; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].W, +; CM-NEXT: SETGT * T1.W, literal.y, KC0[4].X, ; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38) ; CM-NEXT: CNDE T0.X, PV.W, 1.0, literal.x, -; CM-NEXT: SETGT T1.Y, literal.y, KC0[3].Z, -; CM-NEXT: CNDE T0.Z, T0.Z, 0.0, literal.z, -; CM-NEXT: MUL_IEEE * T2.W, KC0[4].X, PV.Z, +; CM-NEXT: CNDE T1.Y, PV.Z, 1.0, literal.x, +; CM-NEXT: SETGT T1.Z, literal.y, KC0[3].Z, +; CM-NEXT: MUL_IEEE * T2.W, KC0[3].Y, PV.Y, ; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38) -; CM-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) +; CM-NEXT: LOG_IEEE T0.X (MASKED), T2.W, +; CM-NEXT: LOG_IEEE T0.Y, T2.W, +; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W, +; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W, +; CM-NEXT: CNDE T1.X, T1.Z, 1.0, literal.x, +; CM-NEXT: AND_INT T2.Y, PV.Y, literal.y, +; CM-NEXT: MUL_IEEE T2.Z, KC0[3].W, T1.Y, +; CM-NEXT: MUL_IEEE * T2.W, KC0[4].X, T0.X, +; CM-NEXT: 1333788672(4.294967e+09), -4096(nan) +; CM-NEXT: LOG_IEEE T0.X, T2.W, +; CM-NEXT: LOG_IEEE T0.Y (MASKED), T2.W, +; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W, +; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W, +; CM-NEXT: LOG_IEEE T1.X (MASKED), T2.Z, +; CM-NEXT: LOG_IEEE T1.Y, T2.Z, +; CM-NEXT: LOG_IEEE T1.Z (MASKED), T2.Z, +; CM-NEXT: LOG_IEEE * T1.W (MASKED), T2.Z, +; CM-NEXT: ADD T2.X, T0.Y, -T2.Y, +; CM-NEXT: AND_INT T3.Y, PV.Y, literal.x, +; CM-NEXT: AND_INT T2.Z, T0.X, literal.x, +; CM-NEXT: MUL_IEEE * T2.W, KC0[3].Z, T1.X, +; CM-NEXT: -4096(nan), 0(0.000000e+00) ; CM-NEXT: LOG_IEEE T1.X, T2.W, ; CM-NEXT: LOG_IEEE T1.Y (MASKED), T2.W, ; CM-NEXT: LOG_IEEE T1.Z (MASKED), T2.W, ; CM-NEXT: LOG_IEEE * T1.W (MASKED), T2.W, -; CM-NEXT: ADD T1.X, PV.X, -T0.Z, -; CM-NEXT: CNDE T2.Y, T1.Y, 1.0, literal.x, -; CM-NEXT: CNDE T0.Z, T1.W, 0.0, literal.y, -; CM-NEXT: MUL_IEEE * T1.W, KC0[3].W, T0.X, -; CM-NEXT: 1333788672(4.294967e+09), 1107296256(3.200000e+01) -; CM-NEXT: LOG_IEEE T0.X, T1.W, -; CM-NEXT: LOG_IEEE T0.Y (MASKED), T1.W, -; CM-NEXT: LOG_IEEE T0.Z (MASKED), T1.W, -; CM-NEXT: LOG_IEEE * T0.W (MASKED), T1.W, -; CM-NEXT: ADD T0.X, PV.X, -T0.Z, -; CM-NEXT: CNDE T1.Y, T1.Y, 0.0, literal.x, -; CM-NEXT: MUL_IEEE T0.Z, KC0[3].Z, T2.Y, -; CM-NEXT: MUL_IEEE * T1.W, T1.X, literal.y, -; CM-NEXT: 1107296256(3.200000e+01), 1050288283(3.010300e-01) -; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.Z, -; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.Z, -; CM-NEXT: LOG_IEEE T0.Z, T0.Z, -; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.Z, -; CM-NEXT: CNDE T1.X, T0.W, 0.0, literal.x, -; CM-NEXT: ADD T1.Y, PV.Z, -T1.Y, -; CM-NEXT: MUL_IEEE T1.Z, T0.X, literal.y, -; CM-NEXT: MUL_IEEE * T0.W, KC0[3].Y, T0.Y, BS:VEC_021/SCL_122 -; CM-NEXT: 1107296256(3.200000e+01), 1050288283(3.010300e-01) -; CM-NEXT: LOG_IEEE T0.X, T0.W, -; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W, -; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, -; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, -; CM-NEXT: MUL_IEEE T1.Y, T1.Y, literal.x, -; CM-NEXT: ADD * T0.W, PV.X, -T1.X, -; CM-NEXT: 1050288283(3.010300e-01), 0(0.000000e+00) -; CM-NEXT: MUL_IEEE * T1.X, PV.W, literal.x, -; CM-NEXT: 1050288283(3.010300e-01), 0(0.000000e+00) +; CM-NEXT: ADD T3.X, T0.X, -T2.Z, +; CM-NEXT: ADD T4.Y, T1.Y, -T3.Y, +; CM-NEXT: AND_INT T3.Z, PV.X, literal.x, +; CM-NEXT: MUL_IEEE * T2.W, T2.X, literal.y, BS:VEC_120/SCL_212 +; CM-NEXT: -4096(nan), 916096251(4.605039e-06) +; CM-NEXT: MULADD_IEEE T4.X, T2.Y, literal.x, PV.W, +; CM-NEXT: ADD T5.Y, T1.X, -PV.Z, +; CM-NEXT: MUL_IEEE T4.Z, PV.Y, literal.x, +; CM-NEXT: MUL_IEEE * T2.W, PV.X, literal.x, +; CM-NEXT: 916096251(4.605039e-06), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE T5.X, T2.Z, literal.x, PV.W, +; CM-NEXT: MULADD_IEEE T6.Y, T3.Y, literal.x, PV.Z, +; CM-NEXT: MUL_IEEE T4.Z, PV.Y, literal.x, +; CM-NEXT: MULADD_IEEE * T2.W, T2.X, literal.y, PV.X, +; CM-NEXT: 916096251(4.605039e-06), 1050288128(3.010254e-01) +; CM-NEXT: MULADD_IEEE T2.X, T2.Y, literal.x, PV.W, +; CM-NEXT: MULADD_IEEE T2.Y, T3.Z, literal.y, PV.Z, +; CM-NEXT: MULADD_IEEE T4.Z, T4.Y, literal.x, PV.Y, BS:VEC_120/SCL_212 +; CM-NEXT: MULADD_IEEE * T2.W, T3.X, literal.x, PV.X, +; CM-NEXT: 1050288128(3.010254e-01), 916096251(4.605039e-06) +; CM-NEXT: MULADD_IEEE T3.X, T2.Z, literal.x, PV.W, +; CM-NEXT: SETGT T4.Y, literal.y, |T0.X|, +; CM-NEXT: MULADD_IEEE T2.Z, T3.Y, literal.x, PV.Z, +; CM-NEXT: SETGT * T2.W, literal.y, |T1.Y|, +; CM-NEXT: 1050288128(3.010254e-01), 2139095040(INF) +; CM-NEXT: CNDE T4.X, PV.W, T1.Y, PV.Z, +; CM-NEXT: CNDE T1.Y, PV.Y, T0.X, PV.X, +; CM-NEXT: CNDE T2.Z, T1.W, 0.0, literal.x, +; CM-NEXT: MULADD_IEEE * T1.W, T5.Y, literal.y, T2.Y, +; CM-NEXT: 1092231323(9.632960e+00), 1050288128(3.010254e-01) +; CM-NEXT: CNDE T0.X, T0.Z, 0.0, literal.x, +; CM-NEXT: MULADD_IEEE T2.Y, T3.Z, literal.y, PV.W, BS:VEC_120/SCL_212 +; CM-NEXT: SETGT T0.Z, literal.z, |T1.X|, +; CM-NEXT: ADD * T2.W, PV.Y, -PV.Z, +; CM-NEXT: 1092231323(9.632960e+00), 1050288128(3.010254e-01) +; CM-NEXT: 2139095040(INF), 0(0.000000e+00) +; CM-NEXT: SETGT T3.X, literal.x, |T0.Y|, +; CM-NEXT: CNDE T1.Y, PV.Z, T1.X, PV.Y, +; CM-NEXT: ADD T2.Z, T4.X, -PV.X, +; CM-NEXT: CNDE * T1.W, T1.Z, 0.0, literal.y, +; CM-NEXT: 2139095040(INF), 1092231323(9.632960e+00) +; CM-NEXT: ADD T2.Y, PV.Y, -PV.W, +; CM-NEXT: CNDE T0.Z, PV.X, T0.Y, T2.X, +; CM-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x, +; CM-NEXT: 1092231323(9.632960e+00), 0(0.000000e+00) +; CM-NEXT: ADD * T2.X, PV.Z, -PV.W, ; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %result = call <4 x float> @llvm.log10.v4f32(<4 x float> %in) @@ -1326,49 +2104,170 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in) } define float @v_log10_f32(float %in) { -; GFX689-SDAG-LABEL: v_log10_f32: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log10_f32: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log10_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log10_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log10_f32: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log10_f32: @@ -1376,14 +2275,21 @@ define float @v_log10_f32(float %in) { ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32: @@ -1400,49 +2306,170 @@ define float @v_log10_f32(float %in) { } define float @v_log10_fabs_f32(float %in) { -; GFX689-SDAG-LABEL: v_log10_fabs_f32: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log10_fabs_f32: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-SDAG-LABEL: v_log10_fabs_f32: -; GFX1100-SDAG: ; %bb.0: -; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-LABEL: v_log10_fabs_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log10_fabs_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_fabs_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_fabs_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_fabs_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_fabs_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log10_fabs_f32: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 -; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 +; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s0 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log10_fabs_f32: @@ -1452,12 +2479,19 @@ define float @v_log10_fabs_f32(float %in) { ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s0 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_fabs_f32: @@ -1475,49 +2509,170 @@ define float @v_log10_fabs_f32(float %in) { } define float @v_log10_fneg_fabs_f32(float %in) { -; GFX689-SDAG-LABEL: v_log10_fneg_fabs_f32: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x80800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log10_fneg_fabs_f32: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log10_fneg_fabs_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log10_fneg_fabs_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_fneg_fabs_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_fneg_fabs_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_fneg_fabs_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x80800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_fneg_fabs_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log10_fneg_fabs_f32: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e64 s0, 0x80800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 -; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 +; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s0 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log10_fneg_fabs_f32: @@ -1527,12 +2682,19 @@ define float @v_log10_fneg_fabs_f32(float %in) { ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s0 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_fneg_fabs_f32: @@ -1551,49 +2713,170 @@ define float @v_log10_fneg_fabs_f32(float %in) { } define float @v_log10_fneg_f32(float %in) { -; GFX689-SDAG-LABEL: v_log10_fneg_f32: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x80800000 -; GFX689-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log10_fneg_f32: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log10_fneg_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log10_fneg_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_fneg_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_fneg_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_fneg_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x80800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_fneg_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log10_fneg_f32: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0x80800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log10_fneg_f32: @@ -1603,12 +2886,19 @@ define float @v_log10_fneg_f32(float %in) { ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s0 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_fneg_f32: @@ -1626,50 +2916,20 @@ define float @v_log10_fneg_f32(float %in) { } define float @v_log10_f32_fast(float %in) { -; GFX689-SDAG-LABEL: v_log10_f32_fast: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log10_f32_fast: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-SDAG-LABEL: v_log10_f32_fast: -; GFX1100-SDAG: ; %bb.0: -; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_fast: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-GISEL-LABEL: v_log10_f32_fast: -; GFX1100-GISEL: ; %bb.0: -; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: v_log10_f32_fast: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_fast: ; R600: ; %bb.0: @@ -1743,49 +3003,170 @@ define float @v_log10_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" } define float @v_log10_f32_ninf(float %in) { -; GFX689-SDAG-LABEL: v_log10_f32_ninf: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log10_f32_ninf: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log10_f32_ninf: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log10_f32_ninf: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_f32_ninf: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_f32_ninf: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_f32_ninf: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_f32_ninf: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log10_f32_ninf: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log10_f32_ninf: @@ -1793,14 +3174,21 @@ define float @v_log10_f32_ninf(float %in) { ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_ninf: @@ -1817,50 +3205,20 @@ define float @v_log10_f32_ninf(float %in) { } define float @v_log10_f32_afn(float %in) { -; GFX689-SDAG-LABEL: v_log10_f32_afn: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log10_f32_afn: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-SDAG-LABEL: v_log10_f32_afn: -; GFX1100-SDAG: ; %bb.0: -; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_afn: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-GISEL-LABEL: v_log10_f32_afn: -; GFX1100-GISEL: ; %bb.0: -; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: v_log10_f32_afn: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_afn: ; R600: ; %bb.0: @@ -1905,50 +3263,20 @@ define float @v_log10_f32_afn_daz(float %in) #0 { } define float @v_log10_f32_afn_dynamic(float %in) #1 { -; GFX689-SDAG-LABEL: v_log10_f32_afn_dynamic: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log10_f32_afn_dynamic: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-SDAG-LABEL: v_log10_f32_afn_dynamic: -; GFX1100-SDAG: ; %bb.0: -; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_afn_dynamic: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-GISEL-LABEL: v_log10_f32_afn_dynamic: -; GFX1100-GISEL: ; %bb.0: -; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: v_log10_f32_afn_dynamic: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_afn_dynamic: ; R600: ; %bb.0: @@ -1964,50 +3292,20 @@ define float @v_log10_f32_afn_dynamic(float %in) #1 { } define float @v_fabs_log10_f32_afn(float %in) { -; GFX689-SDAG-LABEL: v_fabs_log10_f32_afn: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_fabs_log10_f32_afn: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_log_f32_e64 v0, |v0| -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-SDAG-LABEL: v_fabs_log10_f32_afn: -; GFX1100-SDAG: ; %bb.0: -; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 -; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_fabs_log10_f32_afn: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e64 v0, |v0| +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-GISEL-LABEL: v_fabs_log10_f32_afn: -; GFX1100-GISEL: ; %bb.0: -; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: v_log_f32_e64 v0, |v0| -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: v_fabs_log10_f32_afn: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_log_f32_e64 v0, |v0| +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_fabs_log10_f32_afn: ; R600: ; %bb.0: @@ -2024,20 +3322,125 @@ define float @v_fabs_log10_f32_afn(float %in) { } define float @v_log10_f32_daz(float %in) #0 { -; GFX689-LABEL: v_log10_f32_daz: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log10_f32_daz: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf +; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 +; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log10_f32_daz: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log10_f32_daz: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 +; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_f32_daz: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_f32_daz: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v3 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_f32_daz: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf +; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 +; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_f32_daz: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 +; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log10_f32_daz: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log10_f32_daz: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_daz: ; R600: ; %bb.0: @@ -2053,49 +3456,170 @@ define float @v_log10_f32_daz(float %in) #0 { } define float @v_log10_f32_nnan(float %in) { -; GFX689-SDAG-LABEL: v_log10_f32_nnan: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log10_f32_nnan: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log10_f32_nnan: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log10_f32_nnan: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_f32_nnan: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_f32_nnan: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_f32_nnan: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_f32_nnan: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log10_f32_nnan: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log10_f32_nnan: @@ -2103,14 +3627,21 @@ define float @v_log10_f32_nnan(float %in) { ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_nnan: @@ -2127,20 +3658,125 @@ define float @v_log10_f32_nnan(float %in) { } define float @v_log10_f32_nnan_daz(float %in) #0 { -; GFX689-LABEL: v_log10_f32_nnan_daz: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log10_f32_nnan_daz: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf +; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 +; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log10_f32_nnan_daz: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log10_f32_nnan_daz: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 +; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_f32_nnan_daz: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_f32_nnan_daz: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v3 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_f32_nnan_daz: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf +; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 +; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_f32_nnan_daz: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 +; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log10_f32_nnan_daz: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log10_f32_nnan_daz: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_nnan_daz: ; R600: ; %bb.0: @@ -2156,49 +3792,164 @@ define float @v_log10_f32_nnan_daz(float %in) #0 { } define float @v_log10_f32_nnan_dynamic(float %in) #1 { -; GFX689-SDAG-LABEL: v_log10_f32_nnan_dynamic: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log10_f32_nnan_dynamic: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log10_f32_nnan_dynamic: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log10_f32_nnan_dynamic: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_f32_nnan_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_f32_nnan_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_f32_nnan_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_f32_nnan_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log10_f32_nnan_dynamic: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log10_f32_nnan_dynamic: @@ -2206,14 +3957,21 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 { ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_nnan_dynamic: @@ -2230,20 +3988,125 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 { } define float @v_log10_f32_ninf_daz(float %in) #0 { -; GFX689-LABEL: v_log10_f32_ninf_daz: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log10_f32_ninf_daz: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf +; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 +; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log10_f32_ninf_daz: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log10_f32_ninf_daz: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 +; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_f32_ninf_daz: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_f32_ninf_daz: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v3 +; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_f32_ninf_daz: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf +; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1 +; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_f32_ninf_daz: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 +; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log10_f32_ninf_daz: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log10_f32_ninf_daz: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_ninf_daz: ; R600: ; %bb.0: @@ -2259,169 +4122,466 @@ define float @v_log10_f32_ninf_daz(float %in) #0 { } define float @v_log10_f32_ninf_dynamic(float %in) #1 { -; GFX689-SDAG-LABEL: v_log10_f32_ninf_dynamic: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log10_f32_ninf_dynamic: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log10_f32_ninf_dynamic: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log10_f32_ninf_dynamic: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_f32_ninf_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_f32_ninf_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_f32_ninf_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_f32_ninf_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log10_f32_ninf_dynamic: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log10_f32_ninf_dynamic: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_log10_f32_ninf_dynamic: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_log10_f32_ninf_dynamic: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call ninf float @llvm.log10.f32(float %in) + ret float %result +} + +define float @v_log10_f32_nnan_ninf(float %in) { +; SI-SDAG-LABEL: v_log10_f32_nnan_ninf: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2 +; SI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log10_f32_nnan_ninf: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_f32_nnan_ninf: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x369a84fb, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v3, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v2, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_f32_nnan_ninf: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v0 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a2000, v0 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_f32_nnan_ninf: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2 +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_f32_nnan_ninf: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-SDAG-LABEL: v_log10_f32_ninf_dynamic: +; GFX1100-SDAG-LABEL: v_log10_f32_nnan_ninf: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-GISEL-LABEL: v_log10_f32_ninf_dynamic: +; GFX1100-GISEL-LABEL: v_log10_f32_nnan_ninf: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; R600-LABEL: v_log10_f32_ninf_dynamic: +; R600-LABEL: v_log10_f32_nnan_ninf: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; -; CM-LABEL: v_log10_f32_ninf_dynamic: +; CM-LABEL: v_log10_f32_nnan_ninf: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD - %result = call ninf float @llvm.log10.f32(float %in) + %result = call nnan ninf float @llvm.log10.f32(float %in) ret float %result } -define float @v_log10_f32_nnan_ninf(float %in) { -; GFX689-SDAG-LABEL: v_log10_f32_nnan_ninf: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log10_f32_nnan_ninf: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +define float @v_log10_f32_nnan_ninf_daz(float %in) #0 { +; SI-SDAG-LABEL: v_log10_f32_nnan_ninf_daz: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_log_f32_e32 v1, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209a, v1 +; SI-SDAG-NEXT: v_fma_f32 v0, v1, s4, -v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; SI-SDAG-NEXT: v_fma_f32 v0, v1, s4, v0 +; SI-SDAG-NEXT: v_mac_f32_e32 v0, 0x3e9a209a, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-SDAG-LABEL: v_log10_f32_nnan_ninf: +; SI-GISEL-LABEL: v_log10_f32_nnan_ninf_daz: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_log_f32_e32 v1, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209a, v1 +; SI-GISEL-NEXT: v_fma_f32 v0, v1, s4, -v0 +; SI-GISEL-NEXT: v_fma_f32 v0, v1, v2, v0 +; SI-GISEL-NEXT: v_mac_f32_e32 v0, 0x3e9a209a, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: v_log10_f32_nnan_ninf_daz: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_log_f32_e32 v0, v0 +; VI-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-NEXT: v_mul_f32_e32 v0, 0x369a84fb, v2 +; VI-NEXT: v_mac_f32_e32 v0, 0x369a84fb, v1 +; VI-NEXT: v_mac_f32_e32 v0, 0x3e9a2000, v2 +; VI-NEXT: v_mac_f32_e32 v0, 0x3e9a2000, v1 +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_f32_nnan_ninf_daz: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209a, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v0, v1, s4, -v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; GFX900-SDAG-NEXT: v_fma_f32 v0, v1, s4, v0 +; GFX900-SDAG-NEXT: v_mac_f32_e32 v0, 0x3e9a209a, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_f32_nnan_ninf_daz: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209a, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v0, v1, s4, -v0 +; GFX900-GISEL-NEXT: v_fma_f32 v0, v1, v2, v0 +; GFX900-GISEL-NEXT: v_mac_f32_e32 v0, 0x3e9a209a, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log10_f32_nnan_ninf_daz: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-GISEL-LABEL: v_log10_f32_nnan_ninf: +; GFX1100-GISEL-LABEL: v_log10_f32_nnan_ninf_daz: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; R600-LABEL: v_log10_f32_nnan_ninf: -; R600: ; %bb.0: -; R600-NEXT: CF_END -; R600-NEXT: PAD -; -; CM-LABEL: v_log10_f32_nnan_ninf: -; CM: ; %bb.0: -; CM-NEXT: CF_END -; CM-NEXT: PAD - %result = call nnan ninf float @llvm.log10.f32(float %in) - ret float %result -} - -define float @v_log10_f32_nnan_ninf_daz(float %in) #0 { -; GFX689-LABEL: v_log10_f32_nnan_ninf_daz: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: v_log10_f32_nnan_ninf_daz: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] -; ; R600-LABEL: v_log10_f32_nnan_ninf_daz: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -2436,49 +4596,143 @@ define float @v_log10_f32_nnan_ninf_daz(float %in) #0 { } define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 { -; GFX689-SDAG-LABEL: v_log10_f32_nnan_ninf_dynamic: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log10_f32_nnan_ninf_dynamic: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log10_f32_nnan_ninf_dynamic: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x411a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log10_f32_nnan_ninf_dynamic: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x411a209b +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_f32_nnan_ninf_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v0 +; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3e9a2000, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x411a209b +; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_f32_nnan_ninf_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v0 +; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x3e9a2000, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x411a209b +; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_f32_nnan_ninf_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x411a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_f32_nnan_ninf_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x411a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log10_f32_nnan_ninf_dynamic: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log10_f32_nnan_ninf_dynamic: @@ -2486,14 +4740,18 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_nnan_ninf_dynamic: @@ -2539,49 +4797,164 @@ define float @v_log10_f32_fast_daz(float %in) #0 { } define float @v_log10_f32_dynamic_mode(float %in) #1 { -; GFX689-SDAG-LABEL: v_log10_f32_dynamic_mode: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log10_f32_dynamic_mode: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log10_f32_dynamic_mode: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log10_f32_dynamic_mode: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_f32_dynamic_mode: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_f32_dynamic_mode: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_f32_dynamic_mode: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_f32_dynamic_mode: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log10_f32_dynamic_mode: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log10_f32_dynamic_mode: @@ -2589,14 +4962,21 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 { ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_dynamic_mode: @@ -2613,35 +4993,142 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 { } define float @v_log10_f32_undef() { -; GFX689-SDAG-LABEL: v_log10_f32_undef: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, s4 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log10_f32_undef: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e64 v2, s4, 1.0 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log10_f32_undef: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_log_f32_e32 v0, s4 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf +; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log10_f32_undef: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1 +; SI-GISEL-NEXT: v_mul_f32_e64 v2, s4, 1.0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_f32_undef: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_log_f32_e32 v0, s4 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_f32_undef: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1 +; VI-GISEL-NEXT: v_mul_f32_e64 v2, s4, 1.0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_f32_undef: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, s4 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf +; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_f32_undef: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1 +; GFX900-GISEL-NEXT: v_mul_f32_e64 v2, s4, 1.0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log10_f32_undef: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, s0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log10_f32_undef: @@ -2650,14 +5137,21 @@ define float @v_log10_f32_undef() { ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, 0x4f800000, s0 ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v1, s0, 1.0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x800000, s0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_undef: @@ -2665,47 +5159,156 @@ define float @v_log10_f32_undef() { ; R600-NEXT: CF_END ; R600-NEXT: PAD ; -; CM-LABEL: v_log10_f32_undef: -; CM: ; %bb.0: -; CM-NEXT: CF_END -; CM-NEXT: PAD - %result = call float @llvm.log10.f32(float undef) - ret float %result -} - -define float @v_log10_f32_0() { -; GFX689-SDAG-LABEL: v_log10_f32_0: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, 0 -; GFX689-SDAG-NEXT: v_add_f32_e32 v0, 0xc2000000, v0 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log10_f32_0: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e64 v2, 0, 1.0 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; CM-LABEL: v_log10_f32_undef: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call float @llvm.log10.f32(float undef) + ret float %result +} + +define float @v_log10_f32_0() { +; SI-SDAG-LABEL: v_log10_f32_0: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_log_f32_e32 v0, 0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf +; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v0, 0xc11a209b, v0 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log10_f32_0: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1 +; SI-GISEL-NEXT: v_mul_f32_e64 v2, 0, 1.0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_f32_0: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_log_f32_e32 v0, 0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v0, 0xc11a209b, v0 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_f32_0: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1 +; VI-GISEL-NEXT: v_mul_f32_e64 v2, 0, 1.0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_f32_0: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, 0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf +; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, 0xc11a209b, v0 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_f32_0: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1 +; GFX900-GISEL-NEXT: v_mul_f32_e64 v2, 0, 1.0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log10_f32_0: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, 0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, 0xc2000000, v0 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, 0xc11a209b, v0 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log10_f32_0: @@ -2714,14 +5317,21 @@ define float @v_log10_f32_0() { ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, 0x4f800000, 0 ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v1, 0, 1.0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x800000, 0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_0: @@ -2738,23 +5348,141 @@ define float @v_log10_f32_0() { } define float @v_log10_f32_from_fpext_f16(i16 %src.i) { -; GFX689-LABEL: v_log10_f32_from_fpext_f16: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log10_f32_from_fpext_f16: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf +; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log10_f32_from_fpext_f16: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log10_f32_from_fpext_f16: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_f32_from_fpext_f16: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_f32_from_fpext_f16: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_f32_from_fpext_f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf +; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_f32_from_fpext_f16: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log10_f32_from_fpext_f16: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log10_f32_from_fpext_f16: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_from_fpext_f16: ; R600: ; %bb.0: @@ -2784,10 +5512,18 @@ define float @v_log10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log10_f32_from_fpext_math_f16: @@ -2795,41 +5531,130 @@ define float @v_log10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_log10_f32_from_fpext_math_f16: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_add_f16_e32 v0, v0, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_log_f32_e32 v0, v0 -; VI-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; VI-NEXT: s_setpc_b64 s[30:31] +; VI-SDAG-LABEL: v_log10_f32_from_fpext_math_f16: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_log10_f32_from_fpext_math_f16: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_add_f16_e32 v0, v0, v1 -; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX900-NEXT: v_log_f32_e32 v0, v0 -; GFX900-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; VI-GISEL-LABEL: v_log10_f32_from_fpext_math_f16: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log10_f32_from_fpext_math_f16: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_add_f16_e32 v0, v0, v1 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_log10_f32_from_fpext_math_f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf +; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_f32_from_fpext_math_f16: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log10_f32_from_fpext_math_f16: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log10_f32_from_fpext_math_f16: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_from_fpext_math_f16: ; R600: ; %bb.0: @@ -2849,52 +5674,166 @@ define float @v_log10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { } define float @v_log10_f32_from_fpext_bf16(bfloat %src) { -; GFX689-SDAG-LABEL: v_log10_f32_from_fpext_bf16: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log10_f32_from_fpext_bf16: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log10_f32_from_fpext_bf16: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log10_f32_from_fpext_bf16: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_f32_from_fpext_bf16: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_f32_from_fpext_bf16: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_f32_from_fpext_bf16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_f32_from_fpext_bf16: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log10_f32_from_fpext_bf16: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log10_f32_from_fpext_bf16: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_from_fpext_bf16: @@ -2919,6 +5858,8 @@ define half @v_log10_f16(half %in) { ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log10_f16: @@ -2973,6 +5914,8 @@ define half @v_log10_fabs_f16(half %in) { ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log10_fabs_f16: @@ -3028,6 +5971,8 @@ define half @v_log10_fneg_fabs_f16(half %in) { ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -|v0| ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log10_fneg_fabs_f16: @@ -3084,6 +6029,8 @@ define half @v_log10_fneg_f16(half %in) { ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log10_fneg_f16: @@ -3139,6 +6086,8 @@ define half @v_log10_f16_fast(half %in) { ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log10_f16_fast: @@ -3197,6 +6146,10 @@ define <2 x half> @v_log10_v2f16(<2 x half> %in) { ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log10_v2f16: @@ -3282,6 +6235,10 @@ define <2 x half> @v_log10_fabs_v2f16(<2 x half> %in) { ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log10_fabs_v2f16: @@ -3397,13 +6354,17 @@ define <2 x half> @v_log10_fneg_fabs_v2f16(<2 x half> %in) { ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_or_b32_e32 v0, 0x80008000, v0 -; SI-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0 +; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log10_fneg_fabs_v2f16: @@ -3520,13 +6481,17 @@ define <2 x half> @v_log10_fneg_v2f16(<2 x half> %in) { ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; SI-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0 +; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log10_fneg_v2f16: @@ -3645,6 +6610,10 @@ define <2 x half> @v_log10_v2f16_fast(<2 x half> %in) { ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log10_v2f16_fast: @@ -3734,6 +6703,12 @@ define <3 x half> @v_log10_v3f16(<3 x half> %in) { ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log10_v3f16: @@ -3823,6 +6798,12 @@ define <3 x half> @v_log10_v3f16_fast(<3 x half> %in) { ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log10_v3f16_fast: @@ -3902,20 +6883,28 @@ define <4 x half> @v_log10_v4f16(<4 x half> %in) { ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2 ; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209b, v3 +; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log10_v4f16: @@ -4062,20 +7051,28 @@ define <4 x half> @v_log10_v4f16_fast(<4 x half> %in) { ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2 ; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209b, v3 +; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log10_v4f16_fast: @@ -4232,4 +7229,6 @@ attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" } attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" } attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX689-GISEL: {{.*}} +; GFX689-SDAG: {{.*}} ; SI: {{.*}}