diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index fc82fb620142f..69ea1501d147d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2609,9 +2609,7 @@ SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op, X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X, Flags); } - SDValue Lowered = LowerFLOGUnsafe( - X, DL, DAG, IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2, - Flags); + SDValue Lowered = LowerFLOGUnsafe(X, DL, DAG, IsLog10, Flags); if (VT == MVT::f16 && !Subtarget->has16BitInsts()) { return DAG.getNode(ISD::FP_ROUND, DL, VT, Lowered, DAG.getTargetConstant(0, DL, MVT::i32), Flags); @@ -2696,11 +2694,36 @@ SDValue AMDGPUTargetLowering::LowerFLOG10(SDValue Op, SelectionDAG &DAG) const { // Do f32 fast math expansion for flog2 or flog10. This is accurate enough for a // promote f16 operation. SDValue AMDGPUTargetLowering::LowerFLOGUnsafe(SDValue Src, const SDLoc &SL, - SelectionDAG &DAG, - double Log2BaseInverted, + SelectionDAG &DAG, bool IsLog10, SDNodeFlags Flags) const { EVT VT = Src.getValueType(); unsigned LogOp = VT == MVT::f32 ? AMDGPUISD::LOG : ISD::FLOG2; + + double Log2BaseInverted = + IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2; + + if (VT == MVT::f32) { + auto [ScaledInput, IsScaled] = getScaledLogInput(DAG, SL, Src, Flags); + if (ScaledInput) { + SDValue LogSrc = DAG.getNode(AMDGPUISD::LOG, SL, VT, ScaledInput, Flags); + SDValue ScaledResultOffset = + DAG.getConstantFP(-32.0 * Log2BaseInverted, SL, VT); + + SDValue Zero = DAG.getConstantFP(0.0f, SL, VT); + + SDValue ResultOffset = DAG.getNode(ISD::SELECT, SL, VT, IsScaled, + ScaledResultOffset, Zero, Flags); + + SDValue Log2Inv = DAG.getConstantFP(Log2BaseInverted, SL, VT); + + if (Subtarget->hasFastFMAF32()) + return DAG.getNode(ISD::FMA, SL, VT, LogSrc, Log2Inv, ResultOffset, + Flags); + SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, LogSrc, Log2Inv, Flags); + return DAG.getNode(ISD::FADD, SL, VT, Mul, ResultOffset); + } + } + SDValue Log2Operand = DAG.getNode(LogOp, SL, VT, Src, Flags); SDValue Log2BaseInvertedOperand = DAG.getConstantFP(Log2BaseInverted, SL, VT); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 26b91155ba85d..c39093b9bb6bb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -72,7 +72,7 @@ class AMDGPUTargetLowering : public TargetLowering { SDValue LowerFLOGCommon(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLOG10(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLOGUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, - double Log2BaseInverted, SDNodeFlags Flags) const; + bool IsLog10, SDNodeFlags Flags) const; SDValue lowerFEXP2(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFEXPUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 9325b14e7cc5f..100180a2ab442 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -3131,16 +3131,13 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI, if (Ty == F16 || MI.getFlag(MachineInstr::FmAfn) || TM.Options.ApproxFuncFPMath || TM.Options.UnsafeFPMath) { - const double Log2BaseInv = - IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2; - if (Ty == F16 && !ST.has16BitInsts()) { Register LogVal = MRI.createGenericVirtualRegister(F32); auto PromoteSrc = B.buildFPExt(F32, X); - legalizeFlogUnsafe(B, LogVal, PromoteSrc.getReg(0), Log2BaseInv, Flags); + legalizeFlogUnsafe(B, LogVal, PromoteSrc.getReg(0), IsLog10, Flags); B.buildFPTrunc(Dst, LogVal); } else { - legalizeFlogUnsafe(B, Dst, X, Log2BaseInv, Flags); + legalizeFlogUnsafe(B, Dst, X, IsLog10, Flags); } MI.eraseFromParent(); @@ -3224,10 +3221,36 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI, } bool AMDGPULegalizerInfo::legalizeFlogUnsafe(MachineIRBuilder &B, Register Dst, - Register Src, - double Log2BaseInverted, + Register Src, bool IsLog10, unsigned Flags) const { + const double Log2BaseInverted = + IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2; + LLT Ty = B.getMRI()->getType(Dst); + + if (Ty == LLT::scalar(32)) { + auto [ScaledInput, IsScaled] = getScaledLogInput(B, Src, Flags); + if (ScaledInput) { + auto LogSrc = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false) + .addUse(Src) + .setMIFlags(Flags); + auto ScaledResultOffset = B.buildFConstant(Ty, -32.0 * Log2BaseInverted); + auto Zero = B.buildFConstant(Ty, 0.0); + auto ResultOffset = + B.buildSelect(Ty, IsScaled, ScaledResultOffset, Zero, Flags); + auto Log2Inv = B.buildFConstant(Ty, Log2BaseInverted); + + if (ST.hasFastFMAF32()) + B.buildFMA(Dst, LogSrc, Log2Inv, ResultOffset, Flags); + else { + auto Mul = B.buildFMul(Ty, LogSrc, Log2Inv, Flags); + B.buildFAdd(Dst, Mul, ResultOffset, Flags); + } + + return true; + } + } + auto Log2Operand = Ty == LLT::scalar(16) ? B.buildFLog2(Ty, Src, Flags) : B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 04773f275c875..534bb2c87ea38 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -85,7 +85,7 @@ class AMDGPULegalizerInfo final : public LegalizerInfo { bool legalizeFlog2(MachineInstr &MI, MachineIRBuilder &B) const; bool legalizeFlogCommon(MachineInstr &MI, MachineIRBuilder &B) const; bool legalizeFlogUnsafe(MachineIRBuilder &B, Register Dst, Register Src, - double Log2BaseInverted, unsigned Flags) const; + bool IsLog10, unsigned Flags) const; bool legalizeFExp2(MachineInstr &MI, MachineIRBuilder &B) const; bool legalizeFExpUnsafe(MachineIRBuilder &B, Register Dst, Register Src, unsigned Flags) const; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll index 41d5f7f2303e8..23aab3fa863d9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll @@ -2924,20 +2924,111 @@ define float @v_log_fneg_f32(float %in) { } define float @v_log_f32_fast(float %in) { -; GFX689-LABEL: v_log_f32_fast: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log_f32_fast: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317218 +; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log_f32_fast: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log_f32_fast: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_log_f32_e32 v2, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218 +; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_f32_fast: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_f32_fast: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_log_f32_e32 v2, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_f32_fast: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317218 +; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_f32_fast: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218 +; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log_f32_fast: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log_f32_fast: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc1b17218, vcc_lo +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3f317218, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_fast: ; R600: ; %bb.0: @@ -2953,20 +3044,111 @@ define float @v_log_f32_fast(float %in) { } define float @v_log_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { -; GFX689-LABEL: v_log_f32_unsafe_math_attr: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log_f32_unsafe_math_attr: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317218 +; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log_f32_unsafe_math_attr: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log_f32_unsafe_math_attr: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_log_f32_e32 v2, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218 +; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_f32_unsafe_math_attr: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_f32_unsafe_math_attr: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_log_f32_e32 v2, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_f32_unsafe_math_attr: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317218 +; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_f32_unsafe_math_attr: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218 +; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log_f32_unsafe_math_attr: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log_f32_unsafe_math_attr: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc1b17218, vcc_lo +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3f317218, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_unsafe_math_attr: ; R600: ; %bb.0: @@ -2982,20 +3164,111 @@ define float @v_log_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { } define float @v_log_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { -; GFX689-LABEL: v_log_f32_approx_fn_attr: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log_f32_approx_fn_attr: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317218 +; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log_f32_approx_fn_attr: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log_f32_approx_fn_attr: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_log_f32_e32 v2, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218 +; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_f32_approx_fn_attr: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_f32_approx_fn_attr: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_log_f32_e32 v2, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_f32_approx_fn_attr: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317218 +; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_f32_approx_fn_attr: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218 +; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log_f32_approx_fn_attr: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log_f32_approx_fn_attr: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc1b17218, vcc_lo +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3f317218, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_approx_fn_attr: ; R600: ; %bb.0: @@ -3213,20 +3486,111 @@ define float @v_log_f32_ninf(float %in) { } define float @v_log_f32_afn(float %in) { -; GFX689-LABEL: v_log_f32_afn: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log_f32_afn: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317218 +; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log_f32_afn: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log_f32_afn: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_log_f32_e32 v2, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218 +; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_f32_afn: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_f32_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_log_f32_e32 v2, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_f32_afn: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317218 +; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_f32_afn: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218 +; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log_f32_afn: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log_f32_afn: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc1b17218, vcc_lo +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3f317218, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_afn: ; R600: ; %bb.0: @@ -3271,20 +3635,111 @@ define float @v_log_f32_afn_daz(float %in) #0 { } define float @v_log_f32_afn_dynamic(float %in) #1 { -; GFX689-LABEL: v_log_f32_afn_dynamic: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log_f32_afn_dynamic: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317218 +; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log_f32_afn_dynamic: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log_f32_afn_dynamic: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_log_f32_e32 v2, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218 +; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_f32_afn_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_f32_afn_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_log_f32_e32 v2, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_f32_afn_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317218 +; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_f32_afn_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218 +; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log_f32_afn_dynamic: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log_f32_afn_dynamic: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc1b17218, vcc_lo +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3f317218, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_afn_dynamic: ; R600: ; %bb.0: @@ -3300,20 +3755,112 @@ define float @v_log_f32_afn_dynamic(float %in) #1 { } define float @v_fabs_log_f32_afn(float %in) { -; GFX689-LABEL: v_fabs_log_f32_afn: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e64 v0, |v0| -; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_fabs_log_f32_afn: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; SI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317218 +; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_fabs_log_f32_afn: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e64 v0, |v0| -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_fabs_log_f32_afn: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_log_f32_e64 v2, |v0| +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218 +; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_fabs_log_f32_afn: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; VI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_fabs_log_f32_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_log_f32_e64 v2, |v0| +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_fabs_log_f32_afn: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317218 +; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_fabs_log_f32_afn: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f32_e64 v2, |v0| +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218 +; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_fabs_log_f32_afn: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, s0 +; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0 +; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_fabs_log_f32_afn: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e64 v1, |v0| +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc1b17218, s0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3f317218, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_fabs_log_f32_afn: ; R600: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll index 1aebcc61aa268..9df2cec444133 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll @@ -2924,20 +2924,111 @@ define float @v_log10_fneg_f32(float %in) { } define float @v_log10_f32_fast(float %in) { -; GFX689-LABEL: v_log10_f32_fast: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log10_f32_fast: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b +; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log10_f32_fast: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log10_f32_fast: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_log_f32_e32 v2, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b +; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_f32_fast: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_f32_fast: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_log_f32_e32 v2, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_f32_fast: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b +; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_f32_fast: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b +; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log10_f32_fast: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc11a209b, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log10_f32_fast: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc11a209b, vcc_lo +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3e9a209b, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_fast: ; R600: ; %bb.0: @@ -2953,20 +3044,111 @@ define float @v_log10_f32_fast(float %in) { } define float @v_log10_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { -; GFX689-LABEL: v_log10_f32_unsafe_math_attr: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log10_f32_unsafe_math_attr: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b +; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log10_f32_unsafe_math_attr: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log10_f32_unsafe_math_attr: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_log_f32_e32 v2, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b +; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_f32_unsafe_math_attr: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_f32_unsafe_math_attr: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_log_f32_e32 v2, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_f32_unsafe_math_attr: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b +; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_f32_unsafe_math_attr: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b +; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log10_f32_unsafe_math_attr: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc11a209b, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log10_f32_unsafe_math_attr: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc11a209b, vcc_lo +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3e9a209b, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_unsafe_math_attr: ; R600: ; %bb.0: @@ -2982,20 +3164,111 @@ define float @v_log10_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { } define float @v_log10_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { -; GFX689-LABEL: v_log10_f32_approx_fn_attr: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log10_f32_approx_fn_attr: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b +; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log10_f32_approx_fn_attr: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log10_f32_approx_fn_attr: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_log_f32_e32 v2, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b +; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_f32_approx_fn_attr: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_f32_approx_fn_attr: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_log_f32_e32 v2, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_f32_approx_fn_attr: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b +; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_f32_approx_fn_attr: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b +; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log10_f32_approx_fn_attr: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc11a209b, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log10_f32_approx_fn_attr: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc11a209b, vcc_lo +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3e9a209b, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_approx_fn_attr: ; R600: ; %bb.0: @@ -3213,20 +3486,111 @@ define float @v_log10_f32_ninf(float %in) { } define float @v_log10_f32_afn(float %in) { -; GFX689-LABEL: v_log10_f32_afn: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log10_f32_afn: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b +; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log10_f32_afn: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log10_f32_afn: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_log_f32_e32 v2, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b +; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_f32_afn: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_f32_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_log_f32_e32 v2, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_f32_afn: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b +; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_f32_afn: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b +; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log10_f32_afn: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc11a209b, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log10_f32_afn: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc11a209b, vcc_lo +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3e9a209b, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_afn: ; R600: ; %bb.0: @@ -3271,20 +3635,111 @@ define float @v_log10_f32_afn_daz(float %in) #0 { } define float @v_log10_f32_afn_dynamic(float %in) #1 { -; GFX689-LABEL: v_log10_f32_afn_dynamic: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log10_f32_afn_dynamic: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b +; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log10_f32_afn_dynamic: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log10_f32_afn_dynamic: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_log_f32_e32 v2, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b +; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_f32_afn_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_f32_afn_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_log_f32_e32 v2, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_f32_afn_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b +; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_f32_afn_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b +; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log10_f32_afn_dynamic: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc11a209b, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log10_f32_afn_dynamic: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc11a209b, vcc_lo +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3e9a209b, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_afn_dynamic: ; R600: ; %bb.0: @@ -3300,20 +3755,112 @@ define float @v_log10_f32_afn_dynamic(float %in) #1 { } define float @v_fabs_log10_f32_afn(float %in) { -; GFX689-LABEL: v_fabs_log10_f32_afn: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e64 v0, |v0| -; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_fabs_log10_f32_afn: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; SI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b +; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_fabs_log10_f32_afn: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e64 v0, |v0| -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_fabs_log10_f32_afn: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_log_f32_e64 v2, |v0| +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b +; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_fabs_log10_f32_afn: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; VI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_fabs_log10_f32_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_log_f32_e64 v2, |v0| +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_fabs_log10_f32_afn: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b +; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_fabs_log10_f32_afn: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f32_e64 v2, |v0| +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b +; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_fabs_log10_f32_afn: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc11a209b, s0 +; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0 +; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_fabs_log10_f32_afn: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e64 v1, |v0| +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc11a209b, s0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3e9a209b, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_fabs_log10_f32_afn: ; R600: ; %bb.0: