diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 2ed66b1d57c4d..ef8da1e35e924 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2760,14 +2760,40 @@ SDValue AMDGPUTargetLowering::lowerFEXP2(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::FMUL, SL, VT, Exp2, ResultScale, Flags); } -SDValue AMDGPUTargetLowering::lowerFEXPUnsafe(SDValue Op, const SDLoc &SL, +SDValue AMDGPUTargetLowering::lowerFEXPUnsafe(SDValue X, const SDLoc &SL, SelectionDAG &DAG, SDNodeFlags Flags) const { - // exp2(M_LOG2E_F * f); - EVT VT = Op.getValueType(); - const SDValue K = DAG.getConstantFP(numbers::log2e, SL, VT); - SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Op, K, Flags); - return DAG.getNode(VT == MVT::f32 ? AMDGPUISD::EXP : ISD::FEXP2, SL, VT, Mul, + EVT VT = X.getValueType(); + const SDValue Log2E = DAG.getConstantFP(numbers::log2e, SL, VT); + + if (VT != MVT::f32 || !needsDenormHandlingF32(DAG, X, Flags)) { + // exp2(M_LOG2E_F * f); + SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, X, Log2E, Flags); + return DAG.getNode(VT == MVT::f32 ? AMDGPUISD::EXP : ISD::FEXP2, SL, VT, + Mul, Flags); + } + + EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + + SDValue Threshold = DAG.getConstantFP(-0x1.5d58a0p+6f, SL, VT); + SDValue NeedsScaling = DAG.getSetCC(SL, SetCCVT, X, Threshold, ISD::SETOLT); + + SDValue ScaleOffset = DAG.getConstantFP(0x1.0p+6f, SL, VT); + + SDValue ScaledX = DAG.getNode(ISD::FADD, SL, VT, X, ScaleOffset, Flags); + + SDValue AdjustedX = + DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, ScaledX, X); + + SDValue ExpInput = DAG.getNode(ISD::FMUL, SL, VT, AdjustedX, Log2E, Flags); + + SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, ExpInput, Flags); + + SDValue ResultScaleFactor = DAG.getConstantFP(0x1.969d48p-93f, SL, VT); + SDValue AdjustedResult = + DAG.getNode(ISD::FMUL, SL, VT, Exp2, ResultScaleFactor, Flags); + + return DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, AdjustedResult, Exp2, Flags); } @@ -2800,7 +2826,7 @@ SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const { // TODO: Interpret allowApproxFunc as ignoring DAZ. This is currently copying // library behavior. Also, is known-not-daz source sufficient? - if (allowApproxFunc(DAG, Flags) && !needsDenormHandlingF32(DAG, X, Flags)) { + if (allowApproxFunc(DAG, Flags)) { assert(!IsExp10 && "todo exp10 support"); return lowerFEXPUnsafe(X, SL, DAG, Flags); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 33be59a69b52e..d7e2d15f98a38 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -3304,20 +3304,42 @@ bool AMDGPULegalizerInfo::legalizeFExp2(MachineInstr &MI, } bool AMDGPULegalizerInfo::legalizeFExpUnsafe(MachineIRBuilder &B, Register Dst, - Register Src, - unsigned Flags) const { + Register X, unsigned Flags) const { LLT Ty = B.getMRI()->getType(Dst); - auto K = B.buildFConstant(Ty, numbers::log2e); - auto Mul = B.buildFMul(Ty, Src, K, Flags); + LLT F32 = LLT::scalar(32); - if (Ty == LLT::scalar(32)) { - B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef{Dst}) + if (Ty != F32 || !needsDenormHandlingF32(B.getMF(), X, Flags)) { + auto Log2E = B.buildFConstant(Ty, numbers::log2e); + auto Mul = B.buildFMul(Ty, X, Log2E, Flags); + + if (Ty == F32) { + B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef{Dst}) .addUse(Mul.getReg(0)) .setMIFlags(Flags); - } else { - B.buildFExp2(Dst, Mul.getReg(0), Flags); + } else { + B.buildFExp2(Dst, Mul.getReg(0), Flags); + } + + return true; } + auto Threshold = B.buildFConstant(Ty, -0x1.5d58a0p+6f); + auto NeedsScaling = + B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), X, Threshold, Flags); + auto ScaleOffset = B.buildFConstant(Ty, 0x1.0p+6f); + auto ScaledX = B.buildFAdd(Ty, X, ScaleOffset, Flags); + auto AdjustedX = B.buildSelect(Ty, NeedsScaling, ScaledX, X, Flags); + + auto Log2E = B.buildFConstant(Ty, numbers::log2e); + auto ExpInput = B.buildFMul(Ty, AdjustedX, Log2E, Flags); + + auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty}) + .addUse(ExpInput.getReg(0)) + .setMIFlags(Flags); + + auto ResultScaleFactor = B.buildFConstant(Ty, 0x1.969d48p-93f); + auto AdjustedResult = B.buildFMul(Ty, Exp2, ResultScaleFactor, Flags); + B.buildSelect(Dst, NeedsScaling, AdjustedResult, Exp2, Flags); return true; } @@ -3358,7 +3380,7 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI, // TODO: Interpret allowApproxFunc as ignoring DAZ. This is currently copying // library behavior. Also, is known-not-daz source sufficient? - if (allowApproxFunc(MF, Flags) && !needsDenormHandlingF32(MF, X, Flags)) { + if (allowApproxFunc(MF, Flags)) { legalizeFExpUnsafe(B, Dst, X, Flags); MI.eraseFromParent(); return true; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll index 36d5326b0a3eb..16ee31a875b39 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll @@ -3212,124 +3212,56 @@ define float @v_exp_fneg_f32(float %in) { } define float @v_exp_f32_fast(float %in) { -; VI-SDAG-LABEL: v_exp_f32_fast: -; VI-SDAG: ; %bb.0: -; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 -; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 -; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 -; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 -; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; VI-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; VI-GISEL-LABEL: v_exp_f32_fast: -; VI-GISEL: ; %bb.0: -; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 -; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 -; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 -; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc -; VI-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-SDAG-LABEL: v_exp_f32_fast: -; GFX900-SDAG: ; %bb.0: -; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 -; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; GCN-SDAG-LABEL: v_exp_f32_fast: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 +; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0 +; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-GISEL-LABEL: v_exp_f32_fast: -; GFX900-GISEL: ; %bb.0: -; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 -; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 -; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc -; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; GCN-GISEL-LABEL: v_exp_f32_fast: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 +; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_fast: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 -; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 -; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 -; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 +; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_f32_fast: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 -; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 -; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 -; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_fast: @@ -3346,148 +3278,56 @@ define float @v_exp_f32_fast(float %in) { } define float @v_exp_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { -; VI-SDAG-LABEL: v_exp_f32_unsafe_math_attr: -; VI-SDAG: ; %bb.0: -; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 -; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 -; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 -; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 -; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; VI-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; VI-GISEL-LABEL: v_exp_f32_unsafe_math_attr: -; VI-GISEL: ; %bb.0: -; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 -; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 -; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 -; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc -; VI-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-SDAG-LABEL: v_exp_f32_unsafe_math_attr: -; GFX900-SDAG: ; %bb.0: -; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1 -; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; GCN-SDAG-LABEL: v_exp_f32_unsafe_math_attr: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 +; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0 +; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-GISEL-LABEL: v_exp_f32_unsafe_math_attr: -; GFX900-GISEL: ; %bb.0: -; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc -; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; GCN-GISEL-LABEL: v_exp_f32_unsafe_math_attr: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 +; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_unsafe_math_attr: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f -; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 -; SI-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 +; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_f32_unsafe_math_attr: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v1 -; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 -; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_unsafe_math_attr: @@ -3504,148 +3344,56 @@ define float @v_exp_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { } define float @v_exp_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { -; VI-SDAG-LABEL: v_exp_f32_approx_fn_attr: -; VI-SDAG: ; %bb.0: -; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 -; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 -; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 -; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 -; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; VI-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; VI-GISEL-LABEL: v_exp_f32_approx_fn_attr: -; VI-GISEL: ; %bb.0: -; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 -; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 -; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 -; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc -; VI-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-SDAG-LABEL: v_exp_f32_approx_fn_attr: -; GFX900-SDAG: ; %bb.0: -; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 -; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; GCN-SDAG-LABEL: v_exp_f32_approx_fn_attr: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 +; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0 +; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-GISEL-LABEL: v_exp_f32_approx_fn_attr: -; GFX900-GISEL: ; %bb.0: -; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 -; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 -; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc -; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; GCN-GISEL-LABEL: v_exp_f32_approx_fn_attr: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 +; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_approx_fn_attr: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 -; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 -; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 -; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 +; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_f32_approx_fn_attr: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 -; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 -; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 -; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_approx_fn_attr: @@ -3657,495 +3405,16 @@ define float @v_exp_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD - %result = call float @llvm.exp.f32(float %in) - ret float %result -} - -define float @v_exp_f32_ninf(float %in) { -; VI-SDAG-LABEL: v_exp_f32_ninf: -; VI-SDAG: ; %bb.0: -; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 -; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 -; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 -; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 -; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; VI-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; VI-GISEL-LABEL: v_exp_f32_ninf: -; VI-GISEL: ; %bb.0: -; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 -; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 -; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 -; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc -; VI-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-SDAG-LABEL: v_exp_f32_ninf: -; GFX900-SDAG: ; %bb.0: -; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 -; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-GISEL-LABEL: v_exp_f32_ninf: -; GFX900-GISEL: ; %bb.0: -; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 -; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 -; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc -; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; SI-SDAG-LABEL: v_exp_f32_ninf: -; SI-SDAG: ; %bb.0: -; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 -; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 -; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 -; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; SI-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; SI-GISEL-LABEL: v_exp_f32_ninf: -; SI-GISEL: ; %bb.0: -; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 -; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 -; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 -; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc -; SI-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; R600-LABEL: v_exp_f32_ninf: -; R600: ; %bb.0: -; R600-NEXT: CF_END -; R600-NEXT: PAD -; -; CM-LABEL: v_exp_f32_ninf: -; CM: ; %bb.0: -; CM-NEXT: CF_END -; CM-NEXT: PAD - %result = call ninf float @llvm.exp.f32(float %in) - ret float %result -} - -define float @v_exp_f32_afn(float %in) { -; VI-SDAG-LABEL: v_exp_f32_afn: -; VI-SDAG: ; %bb.0: -; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 -; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 -; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 -; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 -; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; VI-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; VI-GISEL-LABEL: v_exp_f32_afn: -; VI-GISEL: ; %bb.0: -; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 -; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 -; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 -; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc -; VI-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-SDAG-LABEL: v_exp_f32_afn: -; GFX900-SDAG: ; %bb.0: -; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 -; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-GISEL-LABEL: v_exp_f32_afn: -; GFX900-GISEL: ; %bb.0: -; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 -; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 -; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc -; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; SI-SDAG-LABEL: v_exp_f32_afn: -; SI-SDAG: ; %bb.0: -; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 -; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 -; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 -; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; SI-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; SI-GISEL-LABEL: v_exp_f32_afn: -; SI-GISEL: ; %bb.0: -; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 -; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 -; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 -; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc -; SI-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; R600-LABEL: v_exp_f32_afn: -; R600: ; %bb.0: -; R600-NEXT: CF_END -; R600-NEXT: PAD -; -; CM-LABEL: v_exp_f32_afn: -; CM: ; %bb.0: -; CM-NEXT: CF_END -; CM-NEXT: PAD - %result = call afn float @llvm.exp.f32(float %in) - ret float %result -} - -define float @v_exp_f32_afn_daz(float %in) #0 { -; GCN-LABEL: v_exp_f32_afn_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GCN-NEXT: v_exp_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; SI-LABEL: v_exp_f32_afn_daz: -; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-NEXT: v_exp_f32_e32 v0, v0 -; SI-NEXT: s_setpc_b64 s[30:31] -; -; R600-LABEL: v_exp_f32_afn_daz: -; R600: ; %bb.0: -; R600-NEXT: CF_END -; R600-NEXT: PAD -; -; CM-LABEL: v_exp_f32_afn_daz: -; CM: ; %bb.0: -; CM-NEXT: CF_END -; CM-NEXT: PAD - %result = call afn float @llvm.exp.f32(float %in) - ret float %result -} - -define float @v_exp_f32_afn_dynamic(float %in) #1 { -; VI-SDAG-LABEL: v_exp_f32_afn_dynamic: -; VI-SDAG: ; %bb.0: -; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 -; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 -; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 -; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 -; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; VI-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; VI-GISEL-LABEL: v_exp_f32_afn_dynamic: -; VI-GISEL: ; %bb.0: -; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 -; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 -; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 -; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc -; VI-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-SDAG-LABEL: v_exp_f32_afn_dynamic: -; GFX900-SDAG: ; %bb.0: -; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 -; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-GISEL-LABEL: v_exp_f32_afn_dynamic: -; GFX900-GISEL: ; %bb.0: -; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 -; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 -; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc -; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; SI-SDAG-LABEL: v_exp_f32_afn_dynamic: -; SI-SDAG: ; %bb.0: -; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 -; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 -; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 -; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; SI-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; SI-GISEL-LABEL: v_exp_f32_afn_dynamic: -; SI-GISEL: ; %bb.0: -; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 -; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 -; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 -; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 -; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc -; SI-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; R600-LABEL: v_exp_f32_afn_dynamic: -; R600: ; %bb.0: -; R600-NEXT: CF_END -; R600-NEXT: PAD -; -; CM-LABEL: v_exp_f32_afn_dynamic: -; CM: ; %bb.0: -; CM-NEXT: CF_END -; CM-NEXT: PAD - %result = call afn float @llvm.exp.f32(float %in) + %result = call float @llvm.exp.f32(float %in) ret float %result } -define float @v_fabs_exp_f32_afn(float %in) { -; VI-SDAG-LABEL: v_fabs_exp_f32_afn: +define float @v_exp_f32_ninf(float %in) { +; VI-SDAG-LABEL: v_exp_f32_ninf: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SDAG-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0 -; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v1 -; VI-SDAG-NEXT: v_sub_f32_e64 v4, |v0|, v1 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 @@ -4158,21 +3427,16 @@ define float @v_fabs_exp_f32_afn(float %in) { ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; VI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 -; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; VI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; VI-GISEL-LABEL: v_fabs_exp_f32_afn: +; VI-GISEL-LABEL: v_exp_f32_ninf: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0 -; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v1 -; VI-GISEL-NEXT: v_sub_f32_e64 v2, |v0|, v1 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 @@ -4184,106 +3448,316 @@ define float @v_fabs_exp_f32_afn(float %in) { ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] -; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-SDAG-LABEL: v_fabs_exp_f32_afn: +; GFX900-SDAG-LABEL: v_exp_f32_ninf: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; GFX900-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 -; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f -; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-GISEL-LABEL: v_fabs_exp_f32_afn: +; GFX900-GISEL-LABEL: v_exp_f32_ninf: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; GFX900-GISEL-NEXT: v_mul_f32_e64 v1, |v0|, s4 -; GFX900-GISEL-NEXT: v_fma_f32 v2, |v0|, s4, -v1 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; GFX900-GISEL-NEXT: v_fma_f32 v2, |v0|, v3, v2 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; SI-SDAG-LABEL: v_fabs_exp_f32_afn: +; SI-SDAG-LABEL: v_exp_f32_ninf: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; SI-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 -; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f -; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; SI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; SI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; SI-GISEL-LABEL: v_fabs_exp_f32_afn: +; SI-GISEL-LABEL: v_exp_f32_ninf: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; SI-GISEL-NEXT: v_mul_f32_e64 v1, |v0|, s4 -; SI-GISEL-NEXT: v_fma_f32 v2, |v0|, s4, -v1 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f -; SI-GISEL-NEXT: v_fma_f32 v2, |v0|, v3, v2 +; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] -; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp_f32_ninf: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp_f32_ninf: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call ninf float @llvm.exp.f32(float %in) + ret float %result +} + +define float @v_exp_f32_afn(float %in) { +; GCN-SDAG-LABEL: v_exp_f32_afn: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 +; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0 +; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GCN-GISEL-LABEL: v_exp_f32_afn: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 +; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp_f32_afn: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 +; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp_f32_afn: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp_f32_afn: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp_f32_afn: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call afn float @llvm.exp.f32(float %in) + ret float %result +} + +define float @v_exp_f32_afn_daz(float %in) #0 { +; GCN-LABEL: v_exp_f32_afn_daz: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-NEXT: v_exp_f32_e32 v0, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; SI-LABEL: v_exp_f32_afn_daz: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-NEXT: v_exp_f32_e32 v0, v0 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp_f32_afn_daz: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp_f32_afn_daz: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call afn float @llvm.exp.f32(float %in) + ret float %result +} + +define float @v_exp_f32_afn_dynamic(float %in) #1 { +; GCN-SDAG-LABEL: v_exp_f32_afn_dynamic: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 +; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0 +; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GCN-GISEL-LABEL: v_exp_f32_afn_dynamic: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 +; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp_f32_afn_dynamic: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 +; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp_f32_afn_dynamic: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp_f32_afn_dynamic: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp_f32_afn_dynamic: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call afn float @llvm.exp.f32(float %in) + ret float %result +} + +define float @v_fabs_exp_f32_afn(float %in) { +; GCN-SDAG-LABEL: v_fabs_exp_f32_afn: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 +; GCN-SDAG-NEXT: s_mov_b32 s5, 0x42800000 +; GCN-SDAG-NEXT: v_add_f32_e64 v1, |v0|, s5 +; GCN-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GCN-SDAG-NEXT: v_cndmask_b32_e64 v0, |v0|, v1, vcc +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GCN-GISEL-LABEL: v_fabs_exp_f32_afn: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GCN-GISEL-NEXT: v_add_f32_e64 v2, |v0|, v2 +; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; GCN-GISEL-NEXT: v_cndmask_b32_e64 v0, |v0|, v2, vcc +; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_fabs_exp_f32_afn: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x42800000 +; SI-SDAG-NEXT: v_add_f32_e64 v1, |v0|, s5 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, |v0|, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_fabs_exp_f32_afn: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: v_add_f32_e64 v2, |v0|, v2 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, |v0|, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_fabs_exp_f32_afn: @@ -6465,21 +5939,15 @@ define float @v_exp_f32_from_fpext_math_f16_fast(i16 %src0.i, i16 %src1.i) { ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f -; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 -; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 -; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 -; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_fast: