diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 0a7ae20e9b1c8..7c749e225c599 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -980,8 +980,7 @@ The AMDGPU backend implements the following LLVM IR intrinsics. half). Not implemented for double. Hardware provides 1ULP accuracy for float, and 0.51ULP for half. Float instruction does not natively support denormal - inputs. Backend will optimize out denormal scaling if - marked with the :ref:`afn ` flag. + inputs. :ref:`llvm.sqrt ` Implemented for double, float and half (and vectors). diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 254d02d4ce5ba..fc82fb620142f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2528,7 +2528,7 @@ SDValue AMDGPUTargetLowering::getIsFinite(SelectionDAG &DAG, SDValue Src, std::pair AMDGPUTargetLowering::getScaledLogInput(SelectionDAG &DAG, const SDLoc SL, SDValue Src, SDNodeFlags Flags) const { - if (allowApproxFunc(DAG, Flags) || !needsDenormHandlingF32(DAG, Src, Flags)) + if (!needsDenormHandlingF32(DAG, Src, Flags)) return {}; MVT VT = MVT::f32; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 120c00b14a369..9325b14e7cc5f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -3037,8 +3037,7 @@ static bool needsDenormHandlingF32(const MachineFunction &MF, Register Src, std::pair AMDGPULegalizerInfo::getScaledLogInput(MachineIRBuilder &B, Register Src, unsigned Flags) const { - if (allowApproxFunc(B.getMF(), Flags) || - !needsDenormHandlingF32(B.getMF(), Src, Flags)) + if (!needsDenormHandlingF32(B.getMF(), Src, Flags)) return {}; const LLT F32 = LLT::scalar(32); diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll index f5e30d654b055..6485f13d0c89e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll @@ -1511,17 +1511,59 @@ define float @v_log2_fneg_f32(float %in) { } define float @v_log2_f32_fast(float %in) { -; GFX689-LABEL: v_log2_f32_fast: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; GFX689-SDAG-LABEL: v_log2_f32_fast: +; GFX689-SDAG: ; %bb.0: +; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log2_f32_fast: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX689-GISEL-LABEL: v_log2_f32_fast: +; GFX689-GISEL: ; %bb.0: +; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log2_f32_fast: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log2_f32_fast: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_fast: ; R600: ; %bb.0: @@ -1537,17 +1579,59 @@ define float @v_log2_f32_fast(float %in) { } define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { -; GFX689-LABEL: v_log2_f32_unsafe_math_attr: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; GFX689-SDAG-LABEL: v_log2_f32_unsafe_math_attr: +; GFX689-SDAG: ; %bb.0: +; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log2_f32_unsafe_math_attr: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX689-GISEL-LABEL: v_log2_f32_unsafe_math_attr: +; GFX689-GISEL: ; %bb.0: +; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log2_f32_unsafe_math_attr: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log2_f32_unsafe_math_attr: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_unsafe_math_attr: ; R600: ; %bb.0: @@ -1563,17 +1647,59 @@ define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { } define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { -; GFX689-LABEL: v_log2_f32_approx_fn_attr: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; GFX689-SDAG-LABEL: v_log2_f32_approx_fn_attr: +; GFX689-SDAG: ; %bb.0: +; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log2_f32_approx_fn_attr: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX689-GISEL-LABEL: v_log2_f32_approx_fn_attr: +; GFX689-GISEL: ; %bb.0: +; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log2_f32_approx_fn_attr: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log2_f32_approx_fn_attr: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_approx_fn_attr: ; R600: ; %bb.0: @@ -1657,17 +1783,59 @@ define float @v_log2_f32_ninf(float %in) { } define float @v_log2_f32_afn(float %in) { -; GFX689-LABEL: v_log2_f32_afn: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; GFX689-SDAG-LABEL: v_log2_f32_afn: +; GFX689-SDAG: ; %bb.0: +; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log2_f32_afn: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX689-GISEL-LABEL: v_log2_f32_afn: +; GFX689-GISEL: ; %bb.0: +; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log2_f32_afn: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log2_f32_afn: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_afn: ; R600: ; %bb.0: @@ -1709,17 +1877,59 @@ define float @v_log2_f32_afn_daz(float %in) #0 { } define float @v_log2_f32_afn_dynamic(float %in) #1 { -; GFX689-LABEL: v_log2_f32_afn_dynamic: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e32 v0, v0 -; GFX689-NEXT: s_setpc_b64 s[30:31] +; GFX689-SDAG-LABEL: v_log2_f32_afn_dynamic: +; GFX689-SDAG: ; %bb.0: +; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log2_f32_afn_dynamic: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX689-GISEL-LABEL: v_log2_f32_afn_dynamic: +; GFX689-GISEL: ; %bb.0: +; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log2_f32_afn_dynamic: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log2_f32_afn_dynamic: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_afn_dynamic: ; R600: ; %bb.0: @@ -1735,17 +1945,61 @@ define float @v_log2_f32_afn_dynamic(float %in) #1 { } define float @v_fabs_log2_f32_afn(float %in) { -; GFX689-LABEL: v_fabs_log2_f32_afn: -; GFX689: ; %bb.0: -; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-NEXT: v_log_f32_e64 v0, |v0| -; GFX689-NEXT: s_setpc_b64 s[30:31] +; GFX689-SDAG-LABEL: v_fabs_log2_f32_afn: +; GFX689-SDAG: ; %bb.0: +; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX689-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_fabs_log2_f32_afn: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_log_f32_e64 v0, |v0| -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX689-GISEL-LABEL: v_fabs_log2_f32_afn: +; GFX689-GISEL: ; %bb.0: +; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_fabs_log2_f32_afn: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 +; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_fabs_log2_f32_afn: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 +; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_fabs_log2_f32_afn: ; R600: ; %bb.0: