diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 5bbd42295a1ce..b5c746b81b91e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -4725,7 +4725,7 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI, // FIXME: Doesn't correctly model the FP mode switch, and the FP operations // aren't modeled as reading it. - if (Mode.FP32Denormals != DenormalMode::getIEEE()) + if (Mode.FP32Denormals == DenormalMode::getPreserveSign()) toggleSPDenormMode(true, B, ST, Mode); auto Fma0 = B.buildFMA(S32, NegDivScale0, ApproxRcp, One, Flags); @@ -4737,7 +4737,7 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI, // FIXME: This mishandles dynamic denormal mode. We need to query the // current mode and restore the original. - if (Mode.FP32Denormals != DenormalMode::getIEEE()) + if (Mode.FP32Denormals == DenormalMode::getPreserveSign()) toggleSPDenormMode(false, B, ST, Mode); auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32}) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 777fe76df1151..998904bf08820 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -9589,7 +9589,7 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const { const SIMachineFunctionInfo *Info = MF.getInfo(); const DenormalMode DenormMode = Info->getMode().FP32Denormals; - const bool HasFP32Denormals = DenormMode == DenormalMode::getIEEE(); + const bool HasFP32Denormals = DenormMode != DenormalMode::getPreserveSign(); if (!HasFP32Denormals) { // Note we can't use the STRICT_FMA/STRICT_FMUL for the non-strict FDIV diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll index 25c7f5c8c531d..55b8dd6892097 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll @@ -201,6 +201,144 @@ define float @v_fdiv_f32(float %a, float %b) { ret float %fdiv } + +define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #0 { +; GFX6-IEEE-FASTFMA-LABEL: v_fdiv_f32_dynamic_denorm: +; GFX6-IEEE-FASTFMA: ; %bb.0: +; GFX6-IEEE-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX6-IEEE-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-IEEE-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 +; GFX6-IEEE-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 +; GFX6-IEEE-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 +; GFX6-IEEE-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX6-IEEE-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-FASTFMA-LABEL: v_fdiv_f32_dynamic_denorm: +; GFX6-FLUSH-FASTFMA: ; %bb.0: +; GFX6-FLUSH-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX6-FLUSH-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 +; GFX6-FLUSH-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX6-FLUSH-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-SLOWFMA-LABEL: v_fdiv_f32_dynamic_denorm: +; GFX6-IEEE-SLOWFMA: ; %bb.0: +; GFX6-IEEE-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 +; GFX6-IEEE-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX6-IEEE-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX6-IEEE-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-SLOWFMA-LABEL: v_fdiv_f32_dynamic_denorm: +; GFX6-FLUSH-SLOWFMA: ; %bb.0: +; GFX6-FLUSH-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 +; GFX6-FLUSH-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX6-FLUSH-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX6-FLUSH-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX89-IEEE-LABEL: v_fdiv_f32_dynamic_denorm: +; GFX89-IEEE: ; %bb.0: +; GFX89-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX89-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX89-IEEE-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 +; GFX89-IEEE-NEXT: v_rcp_f32_e32 v4, v2 +; GFX89-IEEE-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX89-IEEE-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX89-IEEE-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX89-IEEE-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX89-IEEE-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX89-IEEE-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX89-IEEE-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX89-IEEE-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX89-IEEE-NEXT: s_setpc_b64 s[30:31] +; +; GFX89-FLUSH-LABEL: v_fdiv_f32_dynamic_denorm: +; GFX89-FLUSH: ; %bb.0: +; GFX89-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX89-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX89-FLUSH-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 +; GFX89-FLUSH-NEXT: v_rcp_f32_e32 v4, v2 +; GFX89-FLUSH-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX89-FLUSH-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX89-FLUSH-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX89-FLUSH-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX89-FLUSH-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX89-FLUSH-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX89-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX89-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX89-FLUSH-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fdiv_f32_dynamic_denorm: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 +; GFX10-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 +; GFX10-NEXT: v_rcp_f32_e32 v3, v2 +; GFX10-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v3 +; GFX10-NEXT: v_mul_f32_e32 v4, v5, v3 +; GFX10-NEXT: v_fma_f32 v6, -v2, v4, v5 +; GFX10-NEXT: v_fmac_f32_e32 v4, v6, v3 +; GFX10-NEXT: v_fma_f32 v2, -v2, v4, v5 +; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v4 +; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_fdiv_f32_dynamic_denorm: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 +; GFX11-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_rcp_f32_e32 v3, v2 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_mul_f32_e32 v4, v5, v3 +; GFX11-NEXT: v_fma_f32 v6, -v2, v4, v5 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_fmac_f32_e32 v4, v6, v3 +; GFX11-NEXT: v_fma_f32 v2, -v2, v4, v5 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v4 +; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fdiv = fdiv float %a, %b + ret float %fdiv +} + define float @v_fdiv_f32_afn(float %a, float %b) { ; GCN-LABEL: v_fdiv_f32_afn: ; GCN: ; %bb.0: @@ -316,6 +454,123 @@ define float @v_fdiv_f32_ulp25(float %a, float %b) { ret float %fdiv } +define float @v_fdiv_f32_dynamic_25ulp(float %x, float %y) #0 { +; GFX6-LABEL: v_fdiv_f32_dynamic_25ulp: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc +; GFX6-NEXT: v_rcp_f32_e32 v2, v2 +; GFX6-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc +; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: v_fdiv_f32_dynamic_25ulp: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX7-NEXT: v_rcp_f32_e32 v2, v2 +; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 +; GFX7-NEXT: v_frexp_mant_f32_e32 v0, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v3, v1 +; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fdiv_f32_dynamic_25ulp: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX8-NEXT: v_rcp_f32_e32 v2, v2 +; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 +; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 +; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v3, v1 +; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-LABEL: v_fdiv_f32_dynamic_25ulp: +; GFX6-IEEE: ; %bb.0: +; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX6-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v2, v2 +; GFX6-IEEE-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX6-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc +; GFX6-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX6-IEEE-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-IEEE-NEXT: v_ldexp_f32_e32 v0, v2, v0 +; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-LABEL: v_fdiv_f32_dynamic_25ulp: +; GFX6-FLUSH: ; %bb.0: +; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-FLUSH-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX6-FLUSH-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-FLUSH-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc +; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v2, v2 +; GFX6-FLUSH-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX6-FLUSH-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-FLUSH-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc +; GFX6-FLUSH-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX6-FLUSH-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-FLUSH-NEXT: v_ldexp_f32_e32 v0, v2, v0 +; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fdiv_f32_dynamic_25ulp: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX10-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX10-NEXT: v_rcp_f32_e32 v2, v2 +; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 +; GFX10-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX10-NEXT: v_ldexp_f32 v0, v2, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_fdiv_f32_dynamic_25ulp: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX11-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_rcp_f32_e32 v2, v2 +; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f32 v0, v2, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; EG-LABEL: v_fdiv_f32_dynamic_25ulp: +; EG: ; %bb.0: +; EG-NEXT: CF_END +; EG-NEXT: PAD + %div = fdiv float %x, %y, !fpmath !0 + ret float %div +} + define float @v_rcp_f32(float %x) { ; GFX6-IEEE-FASTFMA-LABEL: v_rcp_f32: ; GFX6-IEEE-FASTFMA: ; %bb.0: @@ -2220,7 +2475,2280 @@ define <2 x float> @v_fdiv_v2f32_arcp_afn_ulp25(<2 x float> %a, <2 x float> %b) ret <2 x float> %fdiv } -!0 = !{float 2.500000e+00} + +define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #0 { +; GFX6-FASTFMA-LABEL: v_fdiv_f32_dynamic__nnan_ninf: +; GFX6-FASTFMA: ; %bb.0: +; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 +; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 +; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 +; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 +; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 +; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 +; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-SLOWFMA-LABEL: v_fdiv_f32_dynamic__nnan_ninf: +; GFX6-SLOWFMA: ; %bb.0: +; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 +; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: v_fdiv_f32_dynamic__nnan_ninf: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX7-NEXT: v_rcp_f32_e32 v3, v2 +; GFX7-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX7-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 +; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 +; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 +; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 +; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 +; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 +; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fdiv_f32_dynamic__nnan_ninf: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 +; GFX8-NEXT: v_rcp_f32_e32 v4, v2 +; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-FASTFMA-LABEL: v_fdiv_f32_dynamic__nnan_ninf: +; GFX6-IEEE-FASTFMA: ; %bb.0: +; GFX6-IEEE-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX6-IEEE-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-IEEE-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 +; GFX6-IEEE-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 +; GFX6-IEEE-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 +; GFX6-IEEE-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX6-IEEE-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-FASTFMA-LABEL: v_fdiv_f32_dynamic__nnan_ninf: +; GFX6-FLUSH-FASTFMA: ; %bb.0: +; GFX6-FLUSH-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX6-FLUSH-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 +; GFX6-FLUSH-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX6-FLUSH-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-SLOWFMA-LABEL: v_fdiv_f32_dynamic__nnan_ninf: +; GFX6-IEEE-SLOWFMA: ; %bb.0: +; GFX6-IEEE-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 +; GFX6-IEEE-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX6-IEEE-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX6-IEEE-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-SLOWFMA-LABEL: v_fdiv_f32_dynamic__nnan_ninf: +; GFX6-FLUSH-SLOWFMA: ; %bb.0: +; GFX6-FLUSH-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 +; GFX6-FLUSH-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX6-FLUSH-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX6-FLUSH-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX89-IEEE-LABEL: v_fdiv_f32_dynamic__nnan_ninf: +; GFX89-IEEE: ; %bb.0: +; GFX89-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX89-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX89-IEEE-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 +; GFX89-IEEE-NEXT: v_rcp_f32_e32 v4, v2 +; GFX89-IEEE-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX89-IEEE-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX89-IEEE-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX89-IEEE-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX89-IEEE-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX89-IEEE-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX89-IEEE-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX89-IEEE-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX89-IEEE-NEXT: s_setpc_b64 s[30:31] +; +; GFX89-FLUSH-LABEL: v_fdiv_f32_dynamic__nnan_ninf: +; GFX89-FLUSH: ; %bb.0: +; GFX89-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX89-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX89-FLUSH-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 +; GFX89-FLUSH-NEXT: v_rcp_f32_e32 v4, v2 +; GFX89-FLUSH-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX89-FLUSH-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX89-FLUSH-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX89-FLUSH-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX89-FLUSH-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX89-FLUSH-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX89-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX89-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX89-FLUSH-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fdiv_f32_dynamic__nnan_ninf: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 +; GFX10-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 +; GFX10-NEXT: v_rcp_f32_e32 v3, v2 +; GFX10-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v3 +; GFX10-NEXT: v_mul_f32_e32 v4, v5, v3 +; GFX10-NEXT: v_fma_f32 v6, -v2, v4, v5 +; GFX10-NEXT: v_fmac_f32_e32 v4, v6, v3 +; GFX10-NEXT: v_fma_f32 v2, -v2, v4, v5 +; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v4 +; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_fdiv_f32_dynamic__nnan_ninf: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 +; GFX11-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_rcp_f32_e32 v3, v2 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_mul_f32_e32 v4, v5, v3 +; GFX11-NEXT: v_fma_f32 v6, -v2, v4, v5 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_fmac_f32_e32 v4, v6, v3 +; GFX11-NEXT: v_fma_f32 v2, -v2, v4, v5 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v4 +; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; EG-LABEL: v_fdiv_f32_dynamic__nnan_ninf: +; EG: ; %bb.0: +; EG-NEXT: CF_END +; EG-NEXT: PAD + %div = fdiv nnan ninf float %x, %y + ret float %div +} + +define float @v_fdiv_f32_dynamic_25ulp__nnan_ninf(float %x, float %y, float %z) #0 { +; GFX6-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc +; GFX6-NEXT: v_rcp_f32_e32 v2, v2 +; GFX6-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc +; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX7-NEXT: v_rcp_f32_e32 v2, v2 +; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 +; GFX7-NEXT: v_frexp_mant_f32_e32 v0, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v3, v1 +; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX8-NEXT: v_rcp_f32_e32 v2, v2 +; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 +; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 +; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v3, v1 +; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf: +; GFX6-IEEE: ; %bb.0: +; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX6-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v2, v2 +; GFX6-IEEE-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX6-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc +; GFX6-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX6-IEEE-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-IEEE-NEXT: v_ldexp_f32_e32 v0, v2, v0 +; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf: +; GFX6-FLUSH: ; %bb.0: +; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-FLUSH-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX6-FLUSH-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-FLUSH-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc +; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v2, v2 +; GFX6-FLUSH-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX6-FLUSH-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-FLUSH-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc +; GFX6-FLUSH-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX6-FLUSH-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-FLUSH-NEXT: v_ldexp_f32_e32 v0, v2, v0 +; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX10-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX10-NEXT: v_rcp_f32_e32 v2, v2 +; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 +; GFX10-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX10-NEXT: v_ldexp_f32 v0, v2, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX11-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_rcp_f32_e32 v2, v2 +; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f32 v0, v2, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; EG-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf: +; EG: ; %bb.0: +; EG-NEXT: CF_END +; EG-NEXT: PAD + %div = fdiv nnan ninf float %x, %y, !fpmath !0 + ret float %div +} + +define float @v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user(float %x, float %y, float %z) #0 { +; GFX6-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-NEXT: v_frexp_mant_f32_e32 v3, v1 +; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc +; GFX6-NEXT: v_rcp_f32_e32 v3, v3 +; GFX6-NEXT: v_frexp_mant_f32_e32 v4, v0 +; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc +; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_f32_e32 v3, v4, v3 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-NEXT: v_ldexp_f32_e32 v0, v3, v0 +; GFX6-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_frexp_mant_f32_e32 v3, v1 +; GFX7-NEXT: v_rcp_f32_e32 v3, v3 +; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v4, v0 +; GFX7-NEXT: v_frexp_mant_f32_e32 v0, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v3 +; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v4, v1 +; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_mant_f32_e32 v3, v1 +; GFX8-NEXT: v_rcp_f32_e32 v3, v3 +; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v4, v0 +; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 +; GFX8-NEXT: v_mul_f32_e32 v0, v0, v3 +; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v4, v1 +; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX8-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user: +; GFX6-IEEE: ; %bb.0: +; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-IEEE-NEXT: v_frexp_mant_f32_e32 v3, v1 +; GFX6-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-IEEE-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v3 +; GFX6-IEEE-NEXT: v_frexp_mant_f32_e32 v4, v0 +; GFX6-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc +; GFX6-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v3, v4, v3 +; GFX6-IEEE-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-IEEE-NEXT: v_ldexp_f32_e32 v0, v3, v0 +; GFX6-IEEE-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user: +; GFX6-FLUSH: ; %bb.0: +; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-FLUSH-NEXT: v_frexp_mant_f32_e32 v3, v1 +; GFX6-FLUSH-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-FLUSH-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc +; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v3 +; GFX6-FLUSH-NEXT: v_frexp_mant_f32_e32 v4, v0 +; GFX6-FLUSH-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-FLUSH-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc +; GFX6-FLUSH-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_mul_f32_e32 v3, v4, v3 +; GFX6-FLUSH-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-FLUSH-NEXT: v_ldexp_f32_e32 v0, v3, v0 +; GFX6-FLUSH-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_frexp_mant_f32_e32 v3, v1 +; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX10-NEXT: v_frexp_mant_f32_e32 v4, v0 +; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX10-NEXT: v_rcp_f32_e32 v3, v3 +; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 +; GFX10-NEXT: v_mul_f32_e32 v3, v4, v3 +; GFX10-NEXT: v_ldexp_f32 v0, v3, v0 +; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_frexp_mant_f32_e32 v3, v1 +; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX11-NEXT: v_frexp_mant_f32_e32 v4, v0 +; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_rcp_f32_e32 v3, v3 +; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_mul_f32_e32 v3, v4, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f32 v0, v3, v0 +; GFX11-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; EG-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user: +; EG: ; %bb.0: +; EG-NEXT: CF_END +; EG-NEXT: PAD + %div = fdiv nnan ninf contract float %x, %y, !fpmath !0 + %add = fadd contract float %div, %z + ret float %add +} + + +define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #0 { +; GFX6-FASTFMA-LABEL: v_fdiv_neglhs_f32_dynamic: +; GFX6-FASTFMA: ; %bb.0: +; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0 +; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, -v0, v1, -v0 +; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 +; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 +; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 +; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 +; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 +; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 +; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-SLOWFMA-LABEL: v_fdiv_neglhs_f32_dynamic: +; GFX6-SLOWFMA: ; %bb.0: +; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0 +; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, -v0, v1, -v0 +; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 +; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: v_fdiv_neglhs_f32_dynamic: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0 +; GFX7-NEXT: v_rcp_f32_e32 v3, v2 +; GFX7-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX7-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX7-NEXT: v_div_scale_f32 v4, vcc, -v0, v1, -v0 +; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 +; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 +; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 +; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 +; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 +; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fdiv_neglhs_f32_dynamic: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0 +; GFX8-NEXT: v_div_scale_f32 v3, vcc, -v0, v1, -v0 +; GFX8-NEXT: v_rcp_f32_e32 v4, v2 +; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-FASTFMA-LABEL: v_fdiv_neglhs_f32_dynamic: +; GFX6-IEEE-FASTFMA: ; %bb.0: +; GFX6-IEEE-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-FASTFMA-NEXT: v_xor_b32_e32 v2, 0x80000000, v0 +; GFX6-IEEE-FASTFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v2 +; GFX6-IEEE-FASTFMA-NEXT: v_rcp_f32_e32 v4, v3 +; GFX6-IEEE-FASTFMA-NEXT: v_div_scale_f32 v2, vcc, v2, v1, v2 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX6-IEEE-FASTFMA-NEXT: v_mul_f32_e32 v5, v2, v4 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v6, -v3, v5, v2 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v2, -v3, v5, v2 +; GFX6-IEEE-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX6-IEEE-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 +; GFX6-IEEE-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-FASTFMA-LABEL: v_fdiv_neglhs_f32_dynamic: +; GFX6-FLUSH-FASTFMA: ; %bb.0: +; GFX6-FLUSH-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-FASTFMA-NEXT: v_xor_b32_e32 v2, 0x80000000, v0 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v2 +; GFX6-FLUSH-FASTFMA-NEXT: v_rcp_f32_e32 v4, v3 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_scale_f32 v2, vcc, v2, v1, v2 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX6-FLUSH-FASTFMA-NEXT: v_mul_f32_e32 v5, v2, v4 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v6, -v3, v5, v2 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v2, -v3, v5, v2 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 +; GFX6-FLUSH-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-SLOWFMA-LABEL: v_fdiv_neglhs_f32_dynamic: +; GFX6-IEEE-SLOWFMA: ; %bb.0: +; GFX6-IEEE-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-SLOWFMA-NEXT: v_xor_b32_e32 v2, 0x80000000, v0 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v2 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, v2, v1, v2 +; GFX6-IEEE-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v3 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX6-IEEE-SLOWFMA-NEXT: v_mul_f32_e32 v5, v2, v4 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v6, -v3, v5, v2 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v2, -v3, v5, v2 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 +; GFX6-IEEE-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-SLOWFMA-LABEL: v_fdiv_neglhs_f32_dynamic: +; GFX6-FLUSH-SLOWFMA: ; %bb.0: +; GFX6-FLUSH-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-SLOWFMA-NEXT: v_xor_b32_e32 v2, 0x80000000, v0 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v2 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, v2, v1, v2 +; GFX6-FLUSH-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v3 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX6-FLUSH-SLOWFMA-NEXT: v_mul_f32_e32 v5, v2, v4 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v6, -v3, v5, v2 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v2, -v3, v5, v2 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 +; GFX6-FLUSH-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX89-IEEE-LABEL: v_fdiv_neglhs_f32_dynamic: +; GFX89-IEEE: ; %bb.0: +; GFX89-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX89-IEEE-NEXT: v_xor_b32_e32 v2, 0x80000000, v0 +; GFX89-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v2 +; GFX89-IEEE-NEXT: v_div_scale_f32 v2, vcc, v2, v1, v2 +; GFX89-IEEE-NEXT: v_rcp_f32_e32 v4, v3 +; GFX89-IEEE-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX89-IEEE-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX89-IEEE-NEXT: v_mul_f32_e32 v5, v2, v4 +; GFX89-IEEE-NEXT: v_fma_f32 v6, -v3, v5, v2 +; GFX89-IEEE-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX89-IEEE-NEXT: v_fma_f32 v2, -v3, v5, v2 +; GFX89-IEEE-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX89-IEEE-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 +; GFX89-IEEE-NEXT: s_setpc_b64 s[30:31] +; +; GFX89-FLUSH-LABEL: v_fdiv_neglhs_f32_dynamic: +; GFX89-FLUSH: ; %bb.0: +; GFX89-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX89-FLUSH-NEXT: v_xor_b32_e32 v2, 0x80000000, v0 +; GFX89-FLUSH-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v2 +; GFX89-FLUSH-NEXT: v_div_scale_f32 v2, vcc, v2, v1, v2 +; GFX89-FLUSH-NEXT: v_rcp_f32_e32 v4, v3 +; GFX89-FLUSH-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX89-FLUSH-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX89-FLUSH-NEXT: v_mul_f32_e32 v5, v2, v4 +; GFX89-FLUSH-NEXT: v_fma_f32 v6, -v3, v5, v2 +; GFX89-FLUSH-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX89-FLUSH-NEXT: v_fma_f32 v2, -v3, v5, v2 +; GFX89-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX89-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 +; GFX89-FLUSH-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fdiv_neglhs_f32_dynamic: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_xor_b32_e32 v2, 0x80000000, v0 +; GFX10-NEXT: v_div_scale_f32 v3, s4, v1, v1, v2 +; GFX10-NEXT: v_div_scale_f32 v2, vcc_lo, v2, v1, v2 +; GFX10-NEXT: v_rcp_f32_e32 v4, v3 +; GFX10-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX10-NEXT: v_fmac_f32_e32 v4, v5, v4 +; GFX10-NEXT: v_mul_f32_e32 v5, v2, v4 +; GFX10-NEXT: v_fma_f32 v6, -v3, v5, v2 +; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v4 +; GFX10-NEXT: v_fma_f32 v2, -v3, v5, v2 +; GFX10-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_fdiv_neglhs_f32_dynamic: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_div_scale_f32 v3, null, v1, v1, v2 +; GFX11-NEXT: v_div_scale_f32 v2, vcc_lo, v2, v1, v2 +; GFX11-NEXT: v_rcp_f32_e32 v4, v3 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v4 +; GFX11-NEXT: v_mul_f32_e32 v5, v2, v4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_fma_f32 v6, -v3, v5, v2 +; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_fma_f32 v2, -v3, v5, v2 +; GFX11-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; EG-LABEL: v_fdiv_neglhs_f32_dynamic: +; EG: ; %bb.0: +; EG-NEXT: CF_END +; EG-NEXT: PAD + %neg.x = fneg float %x + %div = fdiv float %neg.x, %y + ret float %div +} + +define float @v_fdiv_neglhs_f32_dynamic_25ulp(float %x, float %y) #0 { +; GFX6-LABEL: v_fdiv_neglhs_f32_dynamic_25ulp: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc +; GFX6-NEXT: v_rcp_f32_e32 v2, v2 +; GFX6-NEXT: v_frexp_mant_f32_e64 v3, -v0 +; GFX6-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX6-NEXT: v_cndmask_b32_e64 v3, -v0, v3, s[4:5] +; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: v_fdiv_neglhs_f32_dynamic_25ulp: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX7-NEXT: v_rcp_f32_e32 v2, v2 +; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 +; GFX7-NEXT: v_frexp_mant_f32_e64 v0, -v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v3, v1 +; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fdiv_neglhs_f32_dynamic_25ulp: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX8-NEXT: v_rcp_f32_e32 v2, v2 +; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 +; GFX8-NEXT: v_frexp_mant_f32_e64 v0, -v0 +; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v3, v1 +; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-LABEL: v_fdiv_neglhs_f32_dynamic_25ulp: +; GFX6-IEEE: ; %bb.0: +; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX6-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v2, v2 +; GFX6-IEEE-NEXT: v_frexp_mant_f32_e64 v3, -v0 +; GFX6-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX6-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cndmask_b32_e64 v3, -v0, v3, s[4:5] +; GFX6-IEEE-NEXT: v_frexp_exp_i32_f32_e64 v0, -v0 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX6-IEEE-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-IEEE-NEXT: v_ldexp_f32_e32 v0, v2, v0 +; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-LABEL: v_fdiv_neglhs_f32_dynamic_25ulp: +; GFX6-FLUSH: ; %bb.0: +; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-FLUSH-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX6-FLUSH-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-FLUSH-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc +; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v2, v2 +; GFX6-FLUSH-NEXT: v_frexp_mant_f32_e64 v3, -v0 +; GFX6-FLUSH-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX6-FLUSH-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cndmask_b32_e64 v3, -v0, v3, s[4:5] +; GFX6-FLUSH-NEXT: v_frexp_exp_i32_f32_e64 v0, -v0 +; GFX6-FLUSH-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX6-FLUSH-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-FLUSH-NEXT: v_ldexp_f32_e32 v0, v2, v0 +; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fdiv_neglhs_f32_dynamic_25ulp: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX10-NEXT: v_frexp_mant_f32_e64 v3, -v0 +; GFX10-NEXT: v_frexp_exp_i32_f32_e64 v0, -v0 +; GFX10-NEXT: v_rcp_f32_e32 v2, v2 +; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 +; GFX10-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX10-NEXT: v_ldexp_f32 v0, v2, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_fdiv_neglhs_f32_dynamic_25ulp: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX11-NEXT: v_frexp_mant_f32_e64 v3, -v0 +; GFX11-NEXT: v_frexp_exp_i32_f32_e64 v0, -v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_rcp_f32_e32 v2, v2 +; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f32 v0, v2, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; EG-LABEL: v_fdiv_neglhs_f32_dynamic_25ulp: +; EG: ; %bb.0: +; EG-NEXT: CF_END +; EG-NEXT: PAD + %neg.x = fneg float %x + %div = fdiv float %neg.x, %y, !fpmath !0 + ret float %div +} + + +define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #0 { +; GFX6-FASTFMA-LABEL: v_fdiv_negrhs_f32_dynamic: +; GFX6-FASTFMA: ; %bb.0: +; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0 +; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, -v1, v0 +; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 +; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 +; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 +; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 +; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 +; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 +; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-SLOWFMA-LABEL: v_fdiv_negrhs_f32_dynamic: +; GFX6-SLOWFMA: ; %bb.0: +; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0 +; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, -v1, v0 +; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 +; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: v_fdiv_negrhs_f32_dynamic: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0 +; GFX7-NEXT: v_rcp_f32_e32 v3, v2 +; GFX7-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX7-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, -v1, v0 +; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 +; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 +; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 +; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 +; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 +; GFX7-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fdiv_negrhs_f32_dynamic: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0 +; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, -v1, v0 +; GFX8-NEXT: v_rcp_f32_e32 v4, v2 +; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX8-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-FASTFMA-LABEL: v_fdiv_negrhs_f32_dynamic: +; GFX6-IEEE-FASTFMA: ; %bb.0: +; GFX6-IEEE-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-FASTFMA-NEXT: v_xor_b32_e32 v2, 0x80000000, v1 +; GFX6-IEEE-FASTFMA-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v0 +; GFX6-IEEE-FASTFMA-NEXT: v_rcp_f32_e32 v4, v3 +; GFX6-IEEE-FASTFMA-NEXT: v_div_scale_f32 v2, vcc, v0, v2, v0 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX6-IEEE-FASTFMA-NEXT: v_mul_f32_e32 v5, v2, v4 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v6, -v3, v5, v2 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v2, -v3, v5, v2 +; GFX6-IEEE-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX6-IEEE-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 +; GFX6-IEEE-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-FASTFMA-LABEL: v_fdiv_negrhs_f32_dynamic: +; GFX6-FLUSH-FASTFMA: ; %bb.0: +; GFX6-FLUSH-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-FASTFMA-NEXT: v_xor_b32_e32 v2, 0x80000000, v1 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v0 +; GFX6-FLUSH-FASTFMA-NEXT: v_rcp_f32_e32 v4, v3 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_scale_f32 v2, vcc, v0, v2, v0 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX6-FLUSH-FASTFMA-NEXT: v_mul_f32_e32 v5, v2, v4 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v6, -v3, v5, v2 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v2, -v3, v5, v2 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 +; GFX6-FLUSH-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-SLOWFMA-LABEL: v_fdiv_negrhs_f32_dynamic: +; GFX6-IEEE-SLOWFMA: ; %bb.0: +; GFX6-IEEE-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-SLOWFMA-NEXT: v_xor_b32_e32 v2, 0x80000000, v1 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v0 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, v0, v2, v0 +; GFX6-IEEE-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v3 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX6-IEEE-SLOWFMA-NEXT: v_mul_f32_e32 v5, v2, v4 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v6, -v3, v5, v2 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v2, -v3, v5, v2 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 +; GFX6-IEEE-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-SLOWFMA-LABEL: v_fdiv_negrhs_f32_dynamic: +; GFX6-FLUSH-SLOWFMA: ; %bb.0: +; GFX6-FLUSH-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-SLOWFMA-NEXT: v_xor_b32_e32 v2, 0x80000000, v1 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v0 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, v0, v2, v0 +; GFX6-FLUSH-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v3 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX6-FLUSH-SLOWFMA-NEXT: v_mul_f32_e32 v5, v2, v4 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v6, -v3, v5, v2 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v2, -v3, v5, v2 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 +; GFX6-FLUSH-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX89-IEEE-LABEL: v_fdiv_negrhs_f32_dynamic: +; GFX89-IEEE: ; %bb.0: +; GFX89-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX89-IEEE-NEXT: v_xor_b32_e32 v2, 0x80000000, v1 +; GFX89-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v0 +; GFX89-IEEE-NEXT: v_div_scale_f32 v2, vcc, v0, v2, v0 +; GFX89-IEEE-NEXT: v_rcp_f32_e32 v4, v3 +; GFX89-IEEE-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX89-IEEE-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX89-IEEE-NEXT: v_mul_f32_e32 v5, v2, v4 +; GFX89-IEEE-NEXT: v_fma_f32 v6, -v3, v5, v2 +; GFX89-IEEE-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX89-IEEE-NEXT: v_fma_f32 v2, -v3, v5, v2 +; GFX89-IEEE-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX89-IEEE-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 +; GFX89-IEEE-NEXT: s_setpc_b64 s[30:31] +; +; GFX89-FLUSH-LABEL: v_fdiv_negrhs_f32_dynamic: +; GFX89-FLUSH: ; %bb.0: +; GFX89-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX89-FLUSH-NEXT: v_xor_b32_e32 v2, 0x80000000, v1 +; GFX89-FLUSH-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v0 +; GFX89-FLUSH-NEXT: v_div_scale_f32 v2, vcc, v0, v2, v0 +; GFX89-FLUSH-NEXT: v_rcp_f32_e32 v4, v3 +; GFX89-FLUSH-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX89-FLUSH-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX89-FLUSH-NEXT: v_mul_f32_e32 v5, v2, v4 +; GFX89-FLUSH-NEXT: v_fma_f32 v6, -v3, v5, v2 +; GFX89-FLUSH-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX89-FLUSH-NEXT: v_fma_f32 v2, -v3, v5, v2 +; GFX89-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX89-FLUSH-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 +; GFX89-FLUSH-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fdiv_negrhs_f32_dynamic: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_xor_b32_e32 v2, 0x80000000, v1 +; GFX10-NEXT: v_div_scale_f32 v3, s4, v2, v2, v0 +; GFX10-NEXT: v_div_scale_f32 v2, vcc_lo, v0, v2, v0 +; GFX10-NEXT: v_rcp_f32_e32 v4, v3 +; GFX10-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX10-NEXT: v_fmac_f32_e32 v4, v5, v4 +; GFX10-NEXT: v_mul_f32_e32 v5, v2, v4 +; GFX10-NEXT: v_fma_f32 v6, -v3, v5, v2 +; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v4 +; GFX10-NEXT: v_fma_f32 v2, -v3, v5, v2 +; GFX10-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX10-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_fdiv_negrhs_f32_dynamic: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_div_scale_f32 v3, null, v2, v2, v0 +; GFX11-NEXT: v_div_scale_f32 v2, vcc_lo, v0, v2, v0 +; GFX11-NEXT: v_rcp_f32_e32 v4, v3 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v4 +; GFX11-NEXT: v_mul_f32_e32 v5, v2, v4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_fma_f32 v6, -v3, v5, v2 +; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_fma_f32 v2, -v3, v5, v2 +; GFX11-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; EG-LABEL: v_fdiv_negrhs_f32_dynamic: +; EG: ; %bb.0: +; EG-NEXT: CF_END +; EG-NEXT: PAD + %neg.y = fneg float %y + %div = fdiv float %x, %neg.y + ret float %div +} + + +define float @v_fdiv_f32_constrhs0_dynamic(float %x) #0 { +; GFX6-FASTFMA-LABEL: v_fdiv_f32_constrhs0_dynamic: +; GFX6-FASTFMA: ; %bb.0: +; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FASTFMA-NEXT: s_mov_b32 s6, 0x4640e400 +; GFX6-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 +; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1 +; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX6-FASTFMA-NEXT: v_fma_f32 v2, v3, v2, v2 +; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, v0, s6, v0 +; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2 +; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3 +; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4 +; GFX6-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3 +; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v1, v1, v2, v4 +; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v1, s6, v0 +; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-SLOWFMA-LABEL: v_fdiv_f32_constrhs0_dynamic: +; GFX6-SLOWFMA: ; %bb.0: +; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SLOWFMA-NEXT: s_mov_b32 s6, 0x4640e400 +; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 +; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, v0, s6, v0 +; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v3, v1 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, -v1, v3, 1.0 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v4, v2, v3 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v1, v4, v2 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v3, v4 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v1, -v1, v4, v2 +; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v1, v1, v3, v4 +; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v1, s6, v0 +; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: v_fdiv_f32_constrhs0_dynamic: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0x4640e400 +; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 +; GFX7-NEXT: v_rcp_f32_e32 v2, v1 +; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 +; GFX7-NEXT: v_div_scale_f32 v3, vcc, v0, s6, v0 +; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 +; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 +; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 +; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 +; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 +; GFX7-NEXT: v_div_fixup_f32 v0, v1, s6, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fdiv_f32_constrhs0_dynamic: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 s6, 0x4640e400 +; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 +; GFX8-NEXT: v_div_scale_f32 v2, vcc, v0, s6, v0 +; GFX8-NEXT: v_rcp_f32_e32 v3, v1 +; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 +; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 +; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 +; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 +; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 +; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 +; GFX8-NEXT: v_div_fixup_f32 v0, v1, s6, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-FASTFMA-LABEL: v_fdiv_f32_constrhs0_dynamic: +; GFX6-IEEE-FASTFMA: ; %bb.0: +; GFX6-IEEE-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-FASTFMA-NEXT: s_mov_b32 s6, 0x4640e400 +; GFX6-IEEE-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 +; GFX6-IEEE-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1 +; GFX6-IEEE-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, v0, s6, v0 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v4, -v1, v2, 1.0 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v2, v4, v2, v2 +; GFX6-IEEE-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3 +; GFX6-IEEE-FASTFMA-NEXT: v_div_fmas_f32 v1, v1, v2, v4 +; GFX6-IEEE-FASTFMA-NEXT: v_div_fixup_f32 v0, v1, s6, v0 +; GFX6-IEEE-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-FASTFMA-LABEL: v_fdiv_f32_constrhs0_dynamic: +; GFX6-FLUSH-FASTFMA: ; %bb.0: +; GFX6-FLUSH-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-FASTFMA-NEXT: s_mov_b32 s6, 0x4640e400 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 +; GFX6-FLUSH-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, v0, s6, v0 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v4, -v1, v2, 1.0 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v2, v4, v2, v2 +; GFX6-FLUSH-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_fmas_f32 v1, v1, v2, v4 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_fixup_f32 v0, v1, s6, v0 +; GFX6-FLUSH-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-SLOWFMA-LABEL: v_fdiv_f32_constrhs0_dynamic: +; GFX6-IEEE-SLOWFMA: ; %bb.0: +; GFX6-IEEE-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-SLOWFMA-NEXT: s_mov_b32 s6, 0x4640e400 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, v0, s6, v0 +; GFX6-IEEE-SLOWFMA-NEXT: v_rcp_f32_e32 v3, v1 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v4, -v1, v3, 1.0 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX6-IEEE-SLOWFMA-NEXT: v_mul_f32_e32 v4, v2, v3 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v5, -v1, v4, v2 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v4, v5, v3, v4 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v1, -v1, v4, v2 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_fmas_f32 v1, v1, v3, v4 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_fixup_f32 v0, v1, s6, v0 +; GFX6-IEEE-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-SLOWFMA-LABEL: v_fdiv_f32_constrhs0_dynamic: +; GFX6-FLUSH-SLOWFMA: ; %bb.0: +; GFX6-FLUSH-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-SLOWFMA-NEXT: s_mov_b32 s6, 0x4640e400 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, v0, s6, v0 +; GFX6-FLUSH-SLOWFMA-NEXT: v_rcp_f32_e32 v3, v1 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v4, -v1, v3, 1.0 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX6-FLUSH-SLOWFMA-NEXT: v_mul_f32_e32 v4, v2, v3 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v5, -v1, v4, v2 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v4, v5, v3, v4 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v1, -v1, v4, v2 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_fmas_f32 v1, v1, v3, v4 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_fixup_f32 v0, v1, s6, v0 +; GFX6-FLUSH-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX89-IEEE-LABEL: v_fdiv_f32_constrhs0_dynamic: +; GFX89-IEEE: ; %bb.0: +; GFX89-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX89-IEEE-NEXT: s_mov_b32 s6, 0x4640e400 +; GFX89-IEEE-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 +; GFX89-IEEE-NEXT: v_div_scale_f32 v2, vcc, v0, s6, v0 +; GFX89-IEEE-NEXT: v_rcp_f32_e32 v3, v1 +; GFX89-IEEE-NEXT: v_fma_f32 v4, -v1, v3, 1.0 +; GFX89-IEEE-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX89-IEEE-NEXT: v_mul_f32_e32 v4, v2, v3 +; GFX89-IEEE-NEXT: v_fma_f32 v5, -v1, v4, v2 +; GFX89-IEEE-NEXT: v_fma_f32 v4, v5, v3, v4 +; GFX89-IEEE-NEXT: v_fma_f32 v1, -v1, v4, v2 +; GFX89-IEEE-NEXT: v_div_fmas_f32 v1, v1, v3, v4 +; GFX89-IEEE-NEXT: v_div_fixup_f32 v0, v1, s6, v0 +; GFX89-IEEE-NEXT: s_setpc_b64 s[30:31] +; +; GFX89-FLUSH-LABEL: v_fdiv_f32_constrhs0_dynamic: +; GFX89-FLUSH: ; %bb.0: +; GFX89-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX89-FLUSH-NEXT: s_mov_b32 s6, 0x4640e400 +; GFX89-FLUSH-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 +; GFX89-FLUSH-NEXT: v_div_scale_f32 v2, vcc, v0, s6, v0 +; GFX89-FLUSH-NEXT: v_rcp_f32_e32 v3, v1 +; GFX89-FLUSH-NEXT: v_fma_f32 v4, -v1, v3, 1.0 +; GFX89-FLUSH-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX89-FLUSH-NEXT: v_mul_f32_e32 v4, v2, v3 +; GFX89-FLUSH-NEXT: v_fma_f32 v5, -v1, v4, v2 +; GFX89-FLUSH-NEXT: v_fma_f32 v4, v5, v3, v4 +; GFX89-FLUSH-NEXT: v_fma_f32 v1, -v1, v4, v2 +; GFX89-FLUSH-NEXT: v_div_fmas_f32 v1, v1, v3, v4 +; GFX89-FLUSH-NEXT: v_div_fixup_f32 v0, v1, s6, v0 +; GFX89-FLUSH-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fdiv_f32_constrhs0_dynamic: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_div_scale_f32 v1, s4, 0x4640e400, 0x4640e400, v0 +; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, 0x4640e400, v0 +; GFX10-NEXT: v_rcp_f32_e32 v2, v1 +; GFX10-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX10-NEXT: v_fmac_f32_e32 v2, v3, v2 +; GFX10-NEXT: v_mul_f32_e32 v3, v4, v2 +; GFX10-NEXT: v_fma_f32 v5, -v1, v3, v4 +; GFX10-NEXT: v_fmac_f32_e32 v3, v5, v2 +; GFX10-NEXT: v_fma_f32 v1, -v1, v3, v4 +; GFX10-NEXT: v_div_fmas_f32 v1, v1, v2, v3 +; GFX10-NEXT: v_div_fixup_f32 v0, v1, 0x4640e400, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_fdiv_f32_constrhs0_dynamic: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_div_scale_f32 v1, null, 0x4640e400, 0x4640e400, v0 +; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, 0x4640e400, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_rcp_f32_e32 v2, v1 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX11-NEXT: v_fmac_f32_e32 v2, v3, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_mul_f32_e32 v3, v4, v2 +; GFX11-NEXT: v_fma_f32 v5, -v1, v3, v4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v2 +; GFX11-NEXT: v_fma_f32 v1, -v1, v3, v4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v3 +; GFX11-NEXT: v_div_fixup_f32 v0, v1, 0x4640e400, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; EG-LABEL: v_fdiv_f32_constrhs0_dynamic: +; EG: ; %bb.0: +; EG-NEXT: CF_END +; EG-NEXT: PAD + %div = fdiv float %x, 12345.0 + ret float %div +} + +define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #0 { +; GFX6-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-NEXT: v_frexp_mant_f32_e32 v2, v0 +; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, 0x4640e400 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc +; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_f32_e32 v2, 0x3fa9e0f0, v2 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v0 +; GFX7-NEXT: v_frexp_mant_f32_e32 v0, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, 0x3fa9e0f0, v0 +; GFX7-NEXT: v_add_i32_e32 v1, vcc, -14, v1 +; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v0 +; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 +; GFX8-NEXT: v_mul_f32_e32 v0, 0x3fa9e0f0, v0 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, -14, v1 +; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp: +; GFX6-IEEE: ; %bb.0: +; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v2, 0x3f40e400 +; GFX6-IEEE-NEXT: v_mov_b32_e32 v4, 0x7f800000 +; GFX6-IEEE-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX6-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v4 +; GFX6-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, 0x4640e400 +; GFX6-IEEE-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc +; GFX6-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX6-IEEE-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-IEEE-NEXT: v_ldexp_f32_e32 v0, v2, v0 +; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp: +; GFX6-FLUSH: ; %bb.0: +; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v2, 0x3f40e400 +; GFX6-FLUSH-NEXT: v_mov_b32_e32 v4, 0x7f800000 +; GFX6-FLUSH-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX6-FLUSH-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v4 +; GFX6-FLUSH-NEXT: v_frexp_exp_i32_f32_e32 v1, 0x4640e400 +; GFX6-FLUSH-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc +; GFX6-FLUSH-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX6-FLUSH-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-FLUSH-NEXT: v_ldexp_f32_e32 v0, v2, v0 +; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_rcp_f32_e32 v1, 0x3f40e400 +; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v0 +; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX10-NEXT: v_subrev_nc_u32_e32 v0, 14, v0 +; GFX10-NEXT: v_mul_f32_e32 v1, v2, v1 +; GFX10-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_rcp_f32_e32 v1, 0x3f40e400 +; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v0 +; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_subrev_nc_u32_e32 v0, 14, v0 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_mul_f32_e32 v1, v2, v1 +; GFX11-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; EG-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp: +; EG: ; %bb.0: +; EG-NEXT: CF_END +; EG-NEXT: PAD + %div = fdiv float %x, 12345.0, !fpmath !0 + ret float %div +} + + +define float @v_fdiv_f32_constlhs0_dynamic(float %x) #0 { +; GFX6-FASTFMA-LABEL: v_fdiv_f32_constlhs0_dynamic: +; GFX6-FASTFMA: ; %bb.0: +; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FASTFMA-NEXT: s_mov_b32 s6, 0x4640e400 +; GFX6-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 +; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1 +; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX6-FASTFMA-NEXT: v_fma_f32 v2, v3, v2, v2 +; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, s6, v0, s6 +; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2 +; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3 +; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4 +; GFX6-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3 +; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v1, v1, v2, v4 +; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v1, v0, s6 +; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-SLOWFMA-LABEL: v_fdiv_f32_constlhs0_dynamic: +; GFX6-SLOWFMA: ; %bb.0: +; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SLOWFMA-NEXT: s_mov_b32 s6, 0x4640e400 +; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 +; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, s6, v0, s6 +; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v3, v1 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, -v1, v3, 1.0 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v4, v2, v3 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v1, v4, v2 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v3, v4 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v1, -v1, v4, v2 +; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v1, v1, v3, v4 +; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v1, v0, s6 +; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: v_fdiv_f32_constlhs0_dynamic: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0x4640e400 +; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 +; GFX7-NEXT: v_rcp_f32_e32 v2, v1 +; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 +; GFX7-NEXT: v_div_scale_f32 v3, vcc, s6, v0, s6 +; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 +; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 +; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 +; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 +; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 +; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, s6 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fdiv_f32_constlhs0_dynamic: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 s6, 0x4640e400 +; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 +; GFX8-NEXT: v_div_scale_f32 v2, vcc, s6, v0, s6 +; GFX8-NEXT: v_rcp_f32_e32 v3, v1 +; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 +; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 +; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 +; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 +; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 +; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 +; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, s6 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-FASTFMA-LABEL: v_fdiv_f32_constlhs0_dynamic: +; GFX6-IEEE-FASTFMA: ; %bb.0: +; GFX6-IEEE-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-FASTFMA-NEXT: s_mov_b32 s6, 0x4640e400 +; GFX6-IEEE-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 +; GFX6-IEEE-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1 +; GFX6-IEEE-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, s6, v0, s6 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v4, -v1, v2, 1.0 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v2, v4, v2, v2 +; GFX6-IEEE-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3 +; GFX6-IEEE-FASTFMA-NEXT: v_div_fmas_f32 v1, v1, v2, v4 +; GFX6-IEEE-FASTFMA-NEXT: v_div_fixup_f32 v0, v1, v0, s6 +; GFX6-IEEE-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-FASTFMA-LABEL: v_fdiv_f32_constlhs0_dynamic: +; GFX6-FLUSH-FASTFMA: ; %bb.0: +; GFX6-FLUSH-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-FASTFMA-NEXT: s_mov_b32 s6, 0x4640e400 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 +; GFX6-FLUSH-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, s6, v0, s6 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v4, -v1, v2, 1.0 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v2, v4, v2, v2 +; GFX6-FLUSH-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_fmas_f32 v1, v1, v2, v4 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_fixup_f32 v0, v1, v0, s6 +; GFX6-FLUSH-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-SLOWFMA-LABEL: v_fdiv_f32_constlhs0_dynamic: +; GFX6-IEEE-SLOWFMA: ; %bb.0: +; GFX6-IEEE-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-SLOWFMA-NEXT: s_mov_b32 s6, 0x4640e400 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, s6, v0, s6 +; GFX6-IEEE-SLOWFMA-NEXT: v_rcp_f32_e32 v3, v1 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v4, -v1, v3, 1.0 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX6-IEEE-SLOWFMA-NEXT: v_mul_f32_e32 v4, v2, v3 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v5, -v1, v4, v2 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v4, v5, v3, v4 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v1, -v1, v4, v2 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_fmas_f32 v1, v1, v3, v4 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_fixup_f32 v0, v1, v0, s6 +; GFX6-IEEE-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-SLOWFMA-LABEL: v_fdiv_f32_constlhs0_dynamic: +; GFX6-FLUSH-SLOWFMA: ; %bb.0: +; GFX6-FLUSH-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-SLOWFMA-NEXT: s_mov_b32 s6, 0x4640e400 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, s6, v0, s6 +; GFX6-FLUSH-SLOWFMA-NEXT: v_rcp_f32_e32 v3, v1 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v4, -v1, v3, 1.0 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX6-FLUSH-SLOWFMA-NEXT: v_mul_f32_e32 v4, v2, v3 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v5, -v1, v4, v2 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v4, v5, v3, v4 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v1, -v1, v4, v2 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_fmas_f32 v1, v1, v3, v4 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_fixup_f32 v0, v1, v0, s6 +; GFX6-FLUSH-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX89-IEEE-LABEL: v_fdiv_f32_constlhs0_dynamic: +; GFX89-IEEE: ; %bb.0: +; GFX89-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX89-IEEE-NEXT: s_mov_b32 s6, 0x4640e400 +; GFX89-IEEE-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 +; GFX89-IEEE-NEXT: v_div_scale_f32 v2, vcc, s6, v0, s6 +; GFX89-IEEE-NEXT: v_rcp_f32_e32 v3, v1 +; GFX89-IEEE-NEXT: v_fma_f32 v4, -v1, v3, 1.0 +; GFX89-IEEE-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX89-IEEE-NEXT: v_mul_f32_e32 v4, v2, v3 +; GFX89-IEEE-NEXT: v_fma_f32 v5, -v1, v4, v2 +; GFX89-IEEE-NEXT: v_fma_f32 v4, v5, v3, v4 +; GFX89-IEEE-NEXT: v_fma_f32 v1, -v1, v4, v2 +; GFX89-IEEE-NEXT: v_div_fmas_f32 v1, v1, v3, v4 +; GFX89-IEEE-NEXT: v_div_fixup_f32 v0, v1, v0, s6 +; GFX89-IEEE-NEXT: s_setpc_b64 s[30:31] +; +; GFX89-FLUSH-LABEL: v_fdiv_f32_constlhs0_dynamic: +; GFX89-FLUSH: ; %bb.0: +; GFX89-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX89-FLUSH-NEXT: s_mov_b32 s6, 0x4640e400 +; GFX89-FLUSH-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 +; GFX89-FLUSH-NEXT: v_div_scale_f32 v2, vcc, s6, v0, s6 +; GFX89-FLUSH-NEXT: v_rcp_f32_e32 v3, v1 +; GFX89-FLUSH-NEXT: v_fma_f32 v4, -v1, v3, 1.0 +; GFX89-FLUSH-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX89-FLUSH-NEXT: v_mul_f32_e32 v4, v2, v3 +; GFX89-FLUSH-NEXT: v_fma_f32 v5, -v1, v4, v2 +; GFX89-FLUSH-NEXT: v_fma_f32 v4, v5, v3, v4 +; GFX89-FLUSH-NEXT: v_fma_f32 v1, -v1, v4, v2 +; GFX89-FLUSH-NEXT: v_div_fmas_f32 v1, v1, v3, v4 +; GFX89-FLUSH-NEXT: v_div_fixup_f32 v0, v1, v0, s6 +; GFX89-FLUSH-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fdiv_f32_constlhs0_dynamic: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_div_scale_f32 v1, s4, v0, v0, 0x4640e400 +; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, 0x4640e400, v0, 0x4640e400 +; GFX10-NEXT: v_rcp_f32_e32 v2, v1 +; GFX10-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX10-NEXT: v_fmac_f32_e32 v2, v3, v2 +; GFX10-NEXT: v_mul_f32_e32 v3, v4, v2 +; GFX10-NEXT: v_fma_f32 v5, -v1, v3, v4 +; GFX10-NEXT: v_fmac_f32_e32 v3, v5, v2 +; GFX10-NEXT: v_fma_f32 v1, -v1, v3, v4 +; GFX10-NEXT: v_div_fmas_f32 v1, v1, v2, v3 +; GFX10-NEXT: v_div_fixup_f32 v0, v1, v0, 0x4640e400 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_fdiv_f32_constlhs0_dynamic: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, 0x4640e400 +; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, 0x4640e400, v0, 0x4640e400 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_rcp_f32_e32 v2, v1 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX11-NEXT: v_fmac_f32_e32 v2, v3, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_mul_f32_e32 v3, v4, v2 +; GFX11-NEXT: v_fma_f32 v5, -v1, v3, v4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v2 +; GFX11-NEXT: v_fma_f32 v1, -v1, v3, v4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v3 +; GFX11-NEXT: v_div_fixup_f32 v0, v1, v0, 0x4640e400 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; EG-LABEL: v_fdiv_f32_constlhs0_dynamic: +; EG: ; %bb.0: +; EG-NEXT: CF_END +; EG-NEXT: PAD + %div = fdiv float 12345.0, %x + ret float %div +} + +define float @v_fdiv_f32_constlhs0_dynamic_25ulp(float %x) #0 { +; GFX6-LABEL: v_fdiv_f32_constlhs0_dynamic_25ulp: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-NEXT: v_frexp_mant_f32_e32 v1, v0 +; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc +; GFX6-NEXT: v_rcp_f32_e32 v1, v1 +; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v2, 0x4640e400 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v2, v0 +; GFX6-NEXT: v_mul_f32_e32 v1, 0x3f40e400, v1 +; GFX6-NEXT: v_ldexp_f32_e32 v0, v1, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: v_fdiv_f32_constlhs0_dynamic_25ulp: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_frexp_mant_f32_e32 v1, v0 +; GFX7-NEXT: v_rcp_f32_e32 v1, v1 +; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX7-NEXT: v_sub_i32_e32 v0, vcc, 14, v0 +; GFX7-NEXT: v_mul_f32_e32 v1, 0x3f40e400, v1 +; GFX7-NEXT: v_ldexp_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fdiv_f32_constlhs0_dynamic_25ulp: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_mant_f32_e32 v1, v0 +; GFX8-NEXT: v_rcp_f32_e32 v1, v1 +; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 14, v0 +; GFX8-NEXT: v_mul_f32_e32 v1, 0x3f40e400, v1 +; GFX8-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-LABEL: v_fdiv_f32_constlhs0_dynamic_25ulp: +; GFX6-IEEE: ; %bb.0: +; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX6-IEEE-NEXT: v_frexp_mant_f32_e32 v1, v0 +; GFX6-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; GFX6-IEEE-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v2, 0x4640e400 +; GFX6-IEEE-NEXT: v_sub_i32_e32 v0, vcc, v2, v0 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v1, 0x3f40e400, v1 +; GFX6-IEEE-NEXT: v_ldexp_f32_e32 v0, v1, v0 +; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-LABEL: v_fdiv_f32_constlhs0_dynamic_25ulp: +; GFX6-FLUSH: ; %bb.0: +; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX6-FLUSH-NEXT: v_frexp_mant_f32_e32 v1, v0 +; GFX6-FLUSH-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; GFX6-FLUSH-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc +; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_frexp_exp_i32_f32_e32 v2, 0x4640e400 +; GFX6-FLUSH-NEXT: v_sub_i32_e32 v0, vcc, v2, v0 +; GFX6-FLUSH-NEXT: v_mul_f32_e32 v1, 0x3f40e400, v1 +; GFX6-FLUSH-NEXT: v_ldexp_f32_e32 v0, v1, v0 +; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fdiv_f32_constlhs0_dynamic_25ulp: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_frexp_mant_f32_e32 v1, v0 +; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX10-NEXT: v_rcp_f32_e32 v1, v1 +; GFX10-NEXT: v_sub_nc_u32_e32 v0, 14, v0 +; GFX10-NEXT: v_mul_f32_e32 v1, 0x3f40e400, v1 +; GFX10-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_fdiv_f32_constlhs0_dynamic_25ulp: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_frexp_mant_f32_e32 v1, v0 +; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_rcp_f32_e32 v1, v1 +; GFX11-NEXT: v_sub_nc_u32_e32 v0, 14, v0 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_mul_f32_e32 v1, 0x3f40e400, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; EG-LABEL: v_fdiv_f32_constlhs0_dynamic_25ulp: +; EG: ; %bb.0: +; EG-NEXT: CF_END +; EG-NEXT: PAD + %div = fdiv float 12345.0, %x, !fpmath !0 + ret float %div +} + + +define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #0 { +; GFX6-FASTFMA-LABEL: v_fdiv_f32_dynamic_nodenorm_x: +; GFX6-FASTFMA: ; %bb.0: +; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 +; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 +; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 +; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 +; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 +; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 +; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-SLOWFMA-LABEL: v_fdiv_f32_dynamic_nodenorm_x: +; GFX6-SLOWFMA: ; %bb.0: +; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 +; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: v_fdiv_f32_dynamic_nodenorm_x: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX7-NEXT: v_rcp_f32_e32 v3, v2 +; GFX7-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX7-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 +; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 +; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 +; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 +; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 +; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 +; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fdiv_f32_dynamic_nodenorm_x: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 +; GFX8-NEXT: v_rcp_f32_e32 v4, v2 +; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-FASTFMA-LABEL: v_fdiv_f32_dynamic_nodenorm_x: +; GFX6-IEEE-FASTFMA: ; %bb.0: +; GFX6-IEEE-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX6-IEEE-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-IEEE-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 +; GFX6-IEEE-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 +; GFX6-IEEE-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 +; GFX6-IEEE-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX6-IEEE-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-FASTFMA-LABEL: v_fdiv_f32_dynamic_nodenorm_x: +; GFX6-FLUSH-FASTFMA: ; %bb.0: +; GFX6-FLUSH-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX6-FLUSH-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 +; GFX6-FLUSH-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX6-FLUSH-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-SLOWFMA-LABEL: v_fdiv_f32_dynamic_nodenorm_x: +; GFX6-IEEE-SLOWFMA: ; %bb.0: +; GFX6-IEEE-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 +; GFX6-IEEE-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX6-IEEE-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX6-IEEE-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-SLOWFMA-LABEL: v_fdiv_f32_dynamic_nodenorm_x: +; GFX6-FLUSH-SLOWFMA: ; %bb.0: +; GFX6-FLUSH-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 +; GFX6-FLUSH-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX6-FLUSH-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX6-FLUSH-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX89-IEEE-LABEL: v_fdiv_f32_dynamic_nodenorm_x: +; GFX89-IEEE: ; %bb.0: +; GFX89-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX89-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX89-IEEE-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 +; GFX89-IEEE-NEXT: v_rcp_f32_e32 v4, v2 +; GFX89-IEEE-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX89-IEEE-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX89-IEEE-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX89-IEEE-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX89-IEEE-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX89-IEEE-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX89-IEEE-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX89-IEEE-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX89-IEEE-NEXT: s_setpc_b64 s[30:31] +; +; GFX89-FLUSH-LABEL: v_fdiv_f32_dynamic_nodenorm_x: +; GFX89-FLUSH: ; %bb.0: +; GFX89-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX89-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX89-FLUSH-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 +; GFX89-FLUSH-NEXT: v_rcp_f32_e32 v4, v2 +; GFX89-FLUSH-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX89-FLUSH-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX89-FLUSH-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX89-FLUSH-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX89-FLUSH-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX89-FLUSH-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX89-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX89-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX89-FLUSH-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fdiv_f32_dynamic_nodenorm_x: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 +; GFX10-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 +; GFX10-NEXT: v_rcp_f32_e32 v3, v2 +; GFX10-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v3 +; GFX10-NEXT: v_mul_f32_e32 v4, v5, v3 +; GFX10-NEXT: v_fma_f32 v6, -v2, v4, v5 +; GFX10-NEXT: v_fmac_f32_e32 v4, v6, v3 +; GFX10-NEXT: v_fma_f32 v2, -v2, v4, v5 +; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v4 +; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_fdiv_f32_dynamic_nodenorm_x: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 +; GFX11-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_rcp_f32_e32 v3, v2 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_mul_f32_e32 v4, v5, v3 +; GFX11-NEXT: v_fma_f32 v6, -v2, v4, v5 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_fmac_f32_e32 v4, v6, v3 +; GFX11-NEXT: v_fma_f32 v2, -v2, v4, v5 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v4 +; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; EG-LABEL: v_fdiv_f32_dynamic_nodenorm_x: +; EG: ; %bb.0: +; EG-NEXT: CF_END +; EG-NEXT: PAD + %div = fdiv float %x, %y + ret float %div +} + +define float @v_fdiv_f32_dynamic_25ulp_nodenorm_x(float nofpclass(sub) %x, float %y) #0 { +; GFX6-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_x: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc +; GFX6-NEXT: v_rcp_f32_e32 v2, v2 +; GFX6-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc +; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_x: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX7-NEXT: v_rcp_f32_e32 v2, v2 +; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 +; GFX7-NEXT: v_frexp_mant_f32_e32 v0, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v3, v1 +; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_x: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX8-NEXT: v_rcp_f32_e32 v2, v2 +; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 +; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 +; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v3, v1 +; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_x: +; GFX6-IEEE: ; %bb.0: +; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX6-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v2, v2 +; GFX6-IEEE-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX6-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc +; GFX6-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX6-IEEE-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-IEEE-NEXT: v_ldexp_f32_e32 v0, v2, v0 +; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_x: +; GFX6-FLUSH: ; %bb.0: +; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-FLUSH-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX6-FLUSH-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-FLUSH-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc +; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v2, v2 +; GFX6-FLUSH-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX6-FLUSH-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-FLUSH-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc +; GFX6-FLUSH-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX6-FLUSH-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-FLUSH-NEXT: v_ldexp_f32_e32 v0, v2, v0 +; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_x: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX10-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX10-NEXT: v_rcp_f32_e32 v2, v2 +; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 +; GFX10-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX10-NEXT: v_ldexp_f32 v0, v2, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_x: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX11-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_rcp_f32_e32 v2, v2 +; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f32 v0, v2, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; EG-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_x: +; EG: ; %bb.0: +; EG-NEXT: CF_END +; EG-NEXT: PAD + %div = fdiv float %x, %y, !fpmath !0 + ret float %div +} + +define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #0 { +; GFX6-FASTFMA-LABEL: v_fdiv_f32_dynamic_nodenorm_y: +; GFX6-FASTFMA: ; %bb.0: +; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 +; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 +; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 +; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 +; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 +; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 +; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-SLOWFMA-LABEL: v_fdiv_f32_dynamic_nodenorm_y: +; GFX6-SLOWFMA: ; %bb.0: +; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 +; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: v_fdiv_f32_dynamic_nodenorm_y: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX7-NEXT: v_rcp_f32_e32 v3, v2 +; GFX7-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX7-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 +; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 +; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 +; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 +; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 +; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 +; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fdiv_f32_dynamic_nodenorm_y: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 +; GFX8-NEXT: v_rcp_f32_e32 v4, v2 +; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-FASTFMA-LABEL: v_fdiv_f32_dynamic_nodenorm_y: +; GFX6-IEEE-FASTFMA: ; %bb.0: +; GFX6-IEEE-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX6-IEEE-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-IEEE-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 +; GFX6-IEEE-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 +; GFX6-IEEE-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 +; GFX6-IEEE-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 +; GFX6-IEEE-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX6-IEEE-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-FASTFMA-LABEL: v_fdiv_f32_dynamic_nodenorm_y: +; GFX6-FLUSH-FASTFMA: ; %bb.0: +; GFX6-FLUSH-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX6-FLUSH-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 +; GFX6-FLUSH-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 +; GFX6-FLUSH-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 +; GFX6-FLUSH-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX6-FLUSH-FASTFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-SLOWFMA-LABEL: v_fdiv_f32_dynamic_nodenorm_y: +; GFX6-IEEE-SLOWFMA: ; %bb.0: +; GFX6-IEEE-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 +; GFX6-IEEE-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX6-IEEE-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX6-IEEE-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX6-IEEE-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX6-IEEE-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-SLOWFMA-LABEL: v_fdiv_f32_dynamic_nodenorm_y: +; GFX6-FLUSH-SLOWFMA: ; %bb.0: +; GFX6-FLUSH-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 +; GFX6-FLUSH-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX6-FLUSH-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX6-FLUSH-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX6-FLUSH-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX6-FLUSH-SLOWFMA-NEXT: s_setpc_b64 s[30:31] +; +; GFX89-IEEE-LABEL: v_fdiv_f32_dynamic_nodenorm_y: +; GFX89-IEEE: ; %bb.0: +; GFX89-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX89-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX89-IEEE-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 +; GFX89-IEEE-NEXT: v_rcp_f32_e32 v4, v2 +; GFX89-IEEE-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX89-IEEE-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX89-IEEE-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX89-IEEE-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX89-IEEE-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX89-IEEE-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX89-IEEE-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX89-IEEE-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX89-IEEE-NEXT: s_setpc_b64 s[30:31] +; +; GFX89-FLUSH-LABEL: v_fdiv_f32_dynamic_nodenorm_y: +; GFX89-FLUSH: ; %bb.0: +; GFX89-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX89-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 +; GFX89-FLUSH-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 +; GFX89-FLUSH-NEXT: v_rcp_f32_e32 v4, v2 +; GFX89-FLUSH-NEXT: v_fma_f32 v5, -v2, v4, 1.0 +; GFX89-FLUSH-NEXT: v_fma_f32 v4, v5, v4, v4 +; GFX89-FLUSH-NEXT: v_mul_f32_e32 v5, v3, v4 +; GFX89-FLUSH-NEXT: v_fma_f32 v6, -v2, v5, v3 +; GFX89-FLUSH-NEXT: v_fma_f32 v5, v6, v4, v5 +; GFX89-FLUSH-NEXT: v_fma_f32 v2, -v2, v5, v3 +; GFX89-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; GFX89-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX89-FLUSH-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fdiv_f32_dynamic_nodenorm_y: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 +; GFX10-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 +; GFX10-NEXT: v_rcp_f32_e32 v3, v2 +; GFX10-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v3 +; GFX10-NEXT: v_mul_f32_e32 v4, v5, v3 +; GFX10-NEXT: v_fma_f32 v6, -v2, v4, v5 +; GFX10-NEXT: v_fmac_f32_e32 v4, v6, v3 +; GFX10-NEXT: v_fma_f32 v2, -v2, v4, v5 +; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v4 +; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_fdiv_f32_dynamic_nodenorm_y: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 +; GFX11-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_rcp_f32_e32 v3, v2 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_mul_f32_e32 v4, v5, v3 +; GFX11-NEXT: v_fma_f32 v6, -v2, v4, v5 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_fmac_f32_e32 v4, v6, v3 +; GFX11-NEXT: v_fma_f32 v2, -v2, v4, v5 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v4 +; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; EG-LABEL: v_fdiv_f32_dynamic_nodenorm_y: +; EG: ; %bb.0: +; EG-NEXT: CF_END +; EG-NEXT: PAD + %div = fdiv float %x, %y + ret float %div +} + +define float @v_fdiv_f32_dynamic_25ulp_nodenorm_y(float %x, float nofpclass(sub) %y) #0 { +; GFX6-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_y: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc +; GFX6-NEXT: v_rcp_f32_e32 v2, v2 +; GFX6-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc +; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_y: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX7-NEXT: v_rcp_f32_e32 v2, v2 +; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 +; GFX7-NEXT: v_frexp_mant_f32_e32 v0, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v3, v1 +; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_y: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX8-NEXT: v_rcp_f32_e32 v2, v2 +; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 +; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 +; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v3, v1 +; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-IEEE-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_y: +; GFX6-IEEE: ; %bb.0: +; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-IEEE-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX6-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v2, v2 +; GFX6-IEEE-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX6-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc +; GFX6-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX6-IEEE-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-IEEE-NEXT: v_ldexp_f32_e32 v0, v2, v0 +; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-FLUSH-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_y: +; GFX6-FLUSH: ; %bb.0: +; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-FLUSH-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-FLUSH-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX6-FLUSH-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-FLUSH-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc +; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v2, v2 +; GFX6-FLUSH-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX6-FLUSH-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-FLUSH-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc +; GFX6-FLUSH-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX6-FLUSH-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX6-FLUSH-NEXT: v_ldexp_f32_e32 v0, v2, v0 +; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_y: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX10-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX10-NEXT: v_rcp_f32_e32 v2, v2 +; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 +; GFX10-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX10-NEXT: v_ldexp_f32 v0, v2, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_y: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX11-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_rcp_f32_e32 v2, v2 +; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_mul_f32_e32 v2, v3, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f32 v0, v2, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; EG-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_y: +; EG: ; %bb.0: +; EG-NEXT: CF_END +; EG-NEXT: PAD + %div = fdiv float %x, %y, !fpmath !0 + ret float %div +} + +!0 = !{float 2.500000e+00} + +attributes #0 = { "denormal-fp-math-f32"="dynamic,dynamic" } + ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GCN-IEEE: {{.*}} -; GFX6-FLUSH: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/fdiv.ll b/llvm/test/CodeGen/AMDGPU/fdiv.ll index 617ca447ec87e..415f9087a372d 100644 --- a/llvm/test/CodeGen/AMDGPU/fdiv.ll +++ b/llvm/test/CodeGen/AMDGPU/fdiv.ll @@ -2416,15 +2416,13 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #2 { ; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 ; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v3, v3 ; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 -; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 ; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 ; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] @@ -2435,14 +2433,12 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #2 { ; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 ; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 ; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 -; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 ; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 -; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 ; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] @@ -2452,15 +2448,13 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #2 { ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 ; GFX7-NEXT: v_rcp_f32_e32 v3, v2 +; GFX7-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX7-NEXT: v_fma_f32 v3, v4, v3, v3 ; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3 ; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 ; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -2471,14 +2465,12 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #2 { ; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 ; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 ; GFX8-NEXT: v_rcp_f32_e32 v4, v2 -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 ; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 ; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 ; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 ; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 ; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 ; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -2487,16 +2479,14 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #2 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 -; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX10-NEXT: v_rcp_f32_e32 v3, v2 -; GFX10-NEXT: s_denorm_mode 15 -; GFX10-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX10-NEXT: v_fmac_f32_e32 v3, v5, v3 +; GFX10-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v3 +; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 ; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX10-NEXT: s_denorm_mode 12 ; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -2505,17 +2495,15 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #2 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 -; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX11-NEXT: v_rcp_f32_e32 v3, v2 -; GFX11-NEXT: s_denorm_mode 15 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v3 +; GFX11-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v3 +; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 ; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX11-NEXT: s_denorm_mode 12 ; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -2720,15 +2708,13 @@ define float @v_fdiv_f32_dynamic(float %x, float %y) #2 { ; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 ; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v3, v3 ; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 -; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 ; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 ; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] @@ -2739,14 +2725,12 @@ define float @v_fdiv_f32_dynamic(float %x, float %y) #2 { ; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 ; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 ; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 -; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 ; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 -; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 ; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] @@ -2756,15 +2740,13 @@ define float @v_fdiv_f32_dynamic(float %x, float %y) #2 { ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 ; GFX7-NEXT: v_rcp_f32_e32 v3, v2 +; GFX7-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX7-NEXT: v_fma_f32 v3, v4, v3, v3 ; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3 ; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 ; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -2775,14 +2757,12 @@ define float @v_fdiv_f32_dynamic(float %x, float %y) #2 { ; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 ; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 ; GFX8-NEXT: v_rcp_f32_e32 v4, v2 -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 ; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 ; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 ; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 ; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 ; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 ; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -2791,16 +2771,14 @@ define float @v_fdiv_f32_dynamic(float %x, float %y) #2 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 -; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX10-NEXT: v_rcp_f32_e32 v3, v2 -; GFX10-NEXT: s_denorm_mode 15 -; GFX10-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX10-NEXT: v_fmac_f32_e32 v3, v5, v3 +; GFX10-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v3 +; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 ; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX10-NEXT: s_denorm_mode 12 ; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -2809,17 +2787,15 @@ define float @v_fdiv_f32_dynamic(float %x, float %y) #2 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 -; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX11-NEXT: v_rcp_f32_e32 v3, v2 -; GFX11-NEXT: s_denorm_mode 15 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v3 +; GFX11-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v3 +; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 ; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX11-NEXT: s_denorm_mode 12 ; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -3281,15 +3257,13 @@ define float @v_fdiv_f32_dynamic_contractable_user(float %x, float %y, float %z) ; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 ; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v4, v3 +; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v4, v4 ; GFX6-FASTFMA-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0 -; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v3, v4, 1.0 -; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v6, v4, v4 ; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v6, v5, v4 ; GFX6-FASTFMA-NEXT: v_fma_f32 v7, -v3, v6, v5 ; GFX6-FASTFMA-NEXT: v_fma_f32 v6, v7, v4, v6 ; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v3, v6, v5 -; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v3, v3, v4, v6 ; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v3, v1, v0 ; GFX6-FASTFMA-NEXT: v_add_f32_e32 v0, v0, v2 @@ -3301,14 +3275,12 @@ define float @v_fdiv_f32_dynamic_contractable_user(float %x, float %y, float %z) ; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 ; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 ; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v5, v3 -; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v3, v5, 1.0 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v5, v5 ; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v6, v4, v5 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v7, -v3, v6, v4 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, v7, v5, v6 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, -v3, v6, v4 -; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v3, v3, v5, v6 ; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v3, v1, v0 ; GFX6-SLOWFMA-NEXT: v_add_f32_e32 v0, v0, v2 @@ -3319,15 +3291,13 @@ define float @v_fdiv_f32_dynamic_contractable_user(float %x, float %y, float %z) ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 ; GFX7-NEXT: v_rcp_f32_e32 v4, v3 +; GFX7-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX7-NEXT: v_fma_f32 v4, v5, v4, v4 ; GFX7-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0 -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX7-NEXT: v_fma_f32 v6, -v3, v4, 1.0 -; GFX7-NEXT: v_fma_f32 v4, v6, v4, v4 ; GFX7-NEXT: v_mul_f32_e32 v6, v5, v4 ; GFX7-NEXT: v_fma_f32 v7, -v3, v6, v5 ; GFX7-NEXT: v_fma_f32 v6, v7, v4, v6 ; GFX7-NEXT: v_fma_f32 v3, -v3, v6, v5 -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX7-NEXT: v_div_fmas_f32 v3, v3, v4, v6 ; GFX7-NEXT: v_div_fixup_f32 v0, v3, v1, v0 ; GFX7-NEXT: v_add_f32_e32 v0, v0, v2 @@ -3339,14 +3309,12 @@ define float @v_fdiv_f32_dynamic_contractable_user(float %x, float %y, float %z) ; GFX8-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 ; GFX8-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 ; GFX8-NEXT: v_rcp_f32_e32 v5, v3 -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX8-NEXT: v_fma_f32 v6, -v3, v5, 1.0 ; GFX8-NEXT: v_fma_f32 v5, v6, v5, v5 ; GFX8-NEXT: v_mul_f32_e32 v6, v4, v5 ; GFX8-NEXT: v_fma_f32 v7, -v3, v6, v4 ; GFX8-NEXT: v_fma_f32 v6, v7, v5, v6 ; GFX8-NEXT: v_fma_f32 v3, -v3, v6, v4 -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX8-NEXT: v_div_fmas_f32 v3, v3, v5, v6 ; GFX8-NEXT: v_div_fixup_f32 v0, v3, v1, v0 ; GFX8-NEXT: v_add_f32_e32 v0, v0, v2 @@ -3356,16 +3324,14 @@ define float @v_fdiv_f32_dynamic_contractable_user(float %x, float %y, float %z) ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_div_scale_f32 v3, s4, v1, v1, v0 -; GFX10-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 ; GFX10-NEXT: v_rcp_f32_e32 v4, v3 -; GFX10-NEXT: s_denorm_mode 15 -; GFX10-NEXT: v_fma_f32 v6, -v3, v4, 1.0 -; GFX10-NEXT: v_fmac_f32_e32 v4, v6, v4 +; GFX10-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX10-NEXT: v_fmac_f32_e32 v4, v5, v4 +; GFX10-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 ; GFX10-NEXT: v_mul_f32_e32 v6, v5, v4 ; GFX10-NEXT: v_fma_f32 v7, -v3, v6, v5 ; GFX10-NEXT: v_fmac_f32_e32 v6, v7, v4 ; GFX10-NEXT: v_fma_f32 v3, -v3, v6, v5 -; GFX10-NEXT: s_denorm_mode 12 ; GFX10-NEXT: v_div_fmas_f32 v3, v3, v4, v6 ; GFX10-NEXT: v_div_fixup_f32 v0, v3, v1, v0 ; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 @@ -3375,17 +3341,15 @@ define float @v_fdiv_f32_dynamic_contractable_user(float %x, float %y, float %z) ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_div_scale_f32 v3, null, v1, v1, v0 -; GFX11-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 ; GFX11-NEXT: v_rcp_f32_e32 v4, v3 -; GFX11-NEXT: s_denorm_mode 15 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_fma_f32 v6, -v3, v4, 1.0 -; GFX11-NEXT: v_fmac_f32_e32 v4, v6, v4 +; GFX11-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v4 +; GFX11-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 ; GFX11-NEXT: v_mul_f32_e32 v6, v5, v4 ; GFX11-NEXT: v_fma_f32 v7, -v3, v6, v5 ; GFX11-NEXT: v_fmac_f32_e32 v6, v7, v4 ; GFX11-NEXT: v_fma_f32 v3, -v3, v6, v5 -; GFX11-NEXT: s_denorm_mode 12 ; GFX11-NEXT: v_div_fmas_f32 v3, v3, v4, v6 ; GFX11-NEXT: v_div_fixup_f32 v0, v3, v1, v0 ; GFX11-NEXT: v_add_f32_e32 v0, v0, v2 @@ -3849,15 +3813,13 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #2 { ; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 ; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v3, v3 ; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 -; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 ; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 ; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] @@ -3868,14 +3830,12 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #2 { ; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 ; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 ; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 -; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 ; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 -; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 ; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] @@ -3885,15 +3845,13 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #2 { ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 ; GFX7-NEXT: v_rcp_f32_e32 v3, v2 +; GFX7-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX7-NEXT: v_fma_f32 v3, v4, v3, v3 ; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3 ; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 ; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -3904,14 +3862,12 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #2 { ; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 ; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 ; GFX8-NEXT: v_rcp_f32_e32 v4, v2 -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 ; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 ; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 ; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 ; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 ; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 ; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -3920,16 +3876,14 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #2 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 -; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX10-NEXT: v_rcp_f32_e32 v3, v2 -; GFX10-NEXT: s_denorm_mode 15 -; GFX10-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX10-NEXT: v_fmac_f32_e32 v3, v5, v3 +; GFX10-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v3 +; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 ; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX10-NEXT: s_denorm_mode 12 ; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -3938,17 +3892,15 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #2 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 -; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX11-NEXT: v_rcp_f32_e32 v3, v2 -; GFX11-NEXT: s_denorm_mode 15 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v3 +; GFX11-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v3 +; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 ; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX11-NEXT: s_denorm_mode 12 ; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -4409,15 +4361,13 @@ define float @v_fdiv_f32_dynamic__nnan_ninf_contractable_user(float %x, float %y ; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 ; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v4, v3 +; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v4, v4 ; GFX6-FASTFMA-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0 -; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v3, v4, 1.0 -; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v6, v4, v4 ; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v6, v5, v4 ; GFX6-FASTFMA-NEXT: v_fma_f32 v7, -v3, v6, v5 ; GFX6-FASTFMA-NEXT: v_fma_f32 v6, v7, v4, v6 ; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v3, v6, v5 -; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v3, v3, v4, v6 ; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v3, v1, v0 ; GFX6-FASTFMA-NEXT: v_add_f32_e32 v0, v0, v2 @@ -4429,14 +4379,12 @@ define float @v_fdiv_f32_dynamic__nnan_ninf_contractable_user(float %x, float %y ; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 ; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 ; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v5, v3 -; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v3, v5, 1.0 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v5, v5 ; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v6, v4, v5 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v7, -v3, v6, v4 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, v7, v5, v6 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, -v3, v6, v4 -; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v3, v3, v5, v6 ; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v3, v1, v0 ; GFX6-SLOWFMA-NEXT: v_add_f32_e32 v0, v0, v2 @@ -4447,15 +4395,13 @@ define float @v_fdiv_f32_dynamic__nnan_ninf_contractable_user(float %x, float %y ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 ; GFX7-NEXT: v_rcp_f32_e32 v4, v3 +; GFX7-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX7-NEXT: v_fma_f32 v4, v5, v4, v4 ; GFX7-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0 -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX7-NEXT: v_fma_f32 v6, -v3, v4, 1.0 -; GFX7-NEXT: v_fma_f32 v4, v6, v4, v4 ; GFX7-NEXT: v_mul_f32_e32 v6, v5, v4 ; GFX7-NEXT: v_fma_f32 v7, -v3, v6, v5 ; GFX7-NEXT: v_fma_f32 v6, v7, v4, v6 ; GFX7-NEXT: v_fma_f32 v3, -v3, v6, v5 -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX7-NEXT: v_div_fmas_f32 v3, v3, v4, v6 ; GFX7-NEXT: v_div_fixup_f32 v0, v3, v1, v0 ; GFX7-NEXT: v_add_f32_e32 v0, v0, v2 @@ -4467,14 +4413,12 @@ define float @v_fdiv_f32_dynamic__nnan_ninf_contractable_user(float %x, float %y ; GFX8-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 ; GFX8-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 ; GFX8-NEXT: v_rcp_f32_e32 v5, v3 -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX8-NEXT: v_fma_f32 v6, -v3, v5, 1.0 ; GFX8-NEXT: v_fma_f32 v5, v6, v5, v5 ; GFX8-NEXT: v_mul_f32_e32 v6, v4, v5 ; GFX8-NEXT: v_fma_f32 v7, -v3, v6, v4 ; GFX8-NEXT: v_fma_f32 v6, v7, v5, v6 ; GFX8-NEXT: v_fma_f32 v3, -v3, v6, v4 -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX8-NEXT: v_div_fmas_f32 v3, v3, v5, v6 ; GFX8-NEXT: v_div_fixup_f32 v0, v3, v1, v0 ; GFX8-NEXT: v_add_f32_e32 v0, v0, v2 @@ -4484,16 +4428,14 @@ define float @v_fdiv_f32_dynamic__nnan_ninf_contractable_user(float %x, float %y ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_div_scale_f32 v3, s4, v1, v1, v0 -; GFX10-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 ; GFX10-NEXT: v_rcp_f32_e32 v4, v3 -; GFX10-NEXT: s_denorm_mode 15 -; GFX10-NEXT: v_fma_f32 v6, -v3, v4, 1.0 -; GFX10-NEXT: v_fmac_f32_e32 v4, v6, v4 +; GFX10-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX10-NEXT: v_fmac_f32_e32 v4, v5, v4 +; GFX10-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 ; GFX10-NEXT: v_mul_f32_e32 v6, v5, v4 ; GFX10-NEXT: v_fma_f32 v7, -v3, v6, v5 ; GFX10-NEXT: v_fmac_f32_e32 v6, v7, v4 ; GFX10-NEXT: v_fma_f32 v3, -v3, v6, v5 -; GFX10-NEXT: s_denorm_mode 12 ; GFX10-NEXT: v_div_fmas_f32 v3, v3, v4, v6 ; GFX10-NEXT: v_div_fixup_f32 v0, v3, v1, v0 ; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 @@ -4503,17 +4445,15 @@ define float @v_fdiv_f32_dynamic__nnan_ninf_contractable_user(float %x, float %y ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_div_scale_f32 v3, null, v1, v1, v0 -; GFX11-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 ; GFX11-NEXT: v_rcp_f32_e32 v4, v3 -; GFX11-NEXT: s_denorm_mode 15 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_fma_f32 v6, -v3, v4, 1.0 -; GFX11-NEXT: v_fmac_f32_e32 v4, v6, v4 +; GFX11-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v4 +; GFX11-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 ; GFX11-NEXT: v_mul_f32_e32 v6, v5, v4 ; GFX11-NEXT: v_fma_f32 v7, -v3, v6, v5 ; GFX11-NEXT: v_fmac_f32_e32 v6, v7, v4 ; GFX11-NEXT: v_fma_f32 v3, -v3, v6, v5 -; GFX11-NEXT: s_denorm_mode 12 ; GFX11-NEXT: v_div_fmas_f32 v3, v3, v4, v6 ; GFX11-NEXT: v_div_fixup_f32 v0, v3, v1, v0 ; GFX11-NEXT: v_add_f32_e32 v0, v0, v2 @@ -4979,15 +4919,13 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #2 { ; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0 ; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v3, v3 ; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, -v0, v1, -v0 -; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 ; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 ; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 ; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] @@ -4998,14 +4936,12 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #2 { ; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0 ; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, -v0, v1, -v0 ; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 -; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 ; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 -; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 ; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 ; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] @@ -5015,15 +4951,13 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #2 { ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0 ; GFX7-NEXT: v_rcp_f32_e32 v3, v2 +; GFX7-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX7-NEXT: v_fma_f32 v3, v4, v3, v3 ; GFX7-NEXT: v_div_scale_f32 v4, vcc, -v0, v1, -v0 -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3 ; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 ; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -5034,14 +4968,12 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #2 { ; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0 ; GFX8-NEXT: v_div_scale_f32 v3, vcc, -v0, v1, -v0 ; GFX8-NEXT: v_rcp_f32_e32 v4, v2 -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 ; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 ; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 ; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 ; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 ; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 ; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -5050,16 +4982,14 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #2 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, -v0 -; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, -v0, v1, -v0 ; GFX10-NEXT: v_rcp_f32_e32 v3, v2 -; GFX10-NEXT: s_denorm_mode 15 -; GFX10-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX10-NEXT: v_fmac_f32_e32 v3, v5, v3 +; GFX10-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v3 +; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, -v0, v1, -v0 ; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 ; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX10-NEXT: s_denorm_mode 12 ; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -5068,17 +4998,15 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #2 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, -v0 -; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, -v0, v1, -v0 ; GFX11-NEXT: v_rcp_f32_e32 v3, v2 -; GFX11-NEXT: s_denorm_mode 15 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v3 +; GFX11-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v3 +; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, -v0, v1, -v0 ; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 ; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX11-NEXT: s_denorm_mode 12 ; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -5532,15 +5460,13 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #2 { ; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0 ; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v3, v3 ; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, -v1, v0 -; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 ; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 ; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 ; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] @@ -5551,14 +5477,12 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #2 { ; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0 ; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, -v1, v0 ; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 -; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 ; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 -; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 ; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 ; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] @@ -5568,15 +5492,13 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #2 { ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0 ; GFX7-NEXT: v_rcp_f32_e32 v3, v2 +; GFX7-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX7-NEXT: v_fma_f32 v3, v4, v3, v3 ; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, -v1, v0 -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3 ; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 ; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX7-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -5587,14 +5509,12 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #2 { ; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0 ; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, -v1, v0 ; GFX8-NEXT: v_rcp_f32_e32 v4, v2 -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 ; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 ; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 ; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 ; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 ; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 ; GFX8-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -5603,16 +5523,14 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #2 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_div_scale_f32 v2, s4, -v1, -v1, v0 -; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, -v1, v0 ; GFX10-NEXT: v_rcp_f32_e32 v3, v2 -; GFX10-NEXT: s_denorm_mode 15 -; GFX10-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX10-NEXT: v_fmac_f32_e32 v3, v5, v3 +; GFX10-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v3 +; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, -v1, v0 ; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 ; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX10-NEXT: s_denorm_mode 12 ; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX10-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -5621,17 +5539,15 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #2 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_div_scale_f32 v2, null, -v1, -v1, v0 -; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, -v1, v0 ; GFX11-NEXT: v_rcp_f32_e32 v3, v2 -; GFX11-NEXT: s_denorm_mode 15 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v3 +; GFX11-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v3 +; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, -v1, v0 ; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 ; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX11-NEXT: s_denorm_mode 12 ; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX11-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -6070,15 +5986,13 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #2 { ; GFX6-FASTFMA-NEXT: s_mov_b32 s6, 0x4640e400 ; GFX6-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 ; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1 +; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX6-FASTFMA-NEXT: v_fma_f32 v2, v3, v2, v2 ; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, v0, s6, v0 -; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v1, v2, 1.0 -; GFX6-FASTFMA-NEXT: v_fma_f32 v2, v4, v2, v2 ; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2 ; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3 ; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4 ; GFX6-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3 -; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v1, v1, v2, v4 ; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v1, s6, v0 ; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] @@ -6090,14 +6004,12 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #2 { ; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 ; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, v0, s6, v0 ; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v3, v1 -; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, -v1, v3, 1.0 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, v4, v3, v3 ; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v4, v2, v3 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v1, v4, v2 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v3, v4 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v1, -v1, v4, v2 -; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v1, v1, v3, v4 ; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v1, s6, v0 ; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] @@ -6108,15 +6020,13 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #2 { ; GFX7-NEXT: s_mov_b32 s6, 0x4640e400 ; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 ; GFX7-NEXT: v_rcp_f32_e32 v2, v1 +; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 ; GFX7-NEXT: v_div_scale_f32 v3, vcc, v0, s6, v0 -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 -; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 ; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 ; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 ; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 ; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 ; GFX7-NEXT: v_div_fixup_f32 v0, v1, s6, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -6128,14 +6038,12 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #2 { ; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 ; GFX8-NEXT: v_div_scale_f32 v2, vcc, v0, s6, v0 ; GFX8-NEXT: v_rcp_f32_e32 v3, v1 -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 ; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 ; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 ; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 ; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 ; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 ; GFX8-NEXT: v_div_fixup_f32 v0, v1, s6, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -6144,16 +6052,14 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #2 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_div_scale_f32 v1, s4, 0x4640e400, 0x4640e400, v0 -; GFX10-NEXT: v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0 ; GFX10-NEXT: v_rcp_f32_e32 v2, v1 -; GFX10-NEXT: s_denorm_mode 15 -; GFX10-NEXT: v_fma_f32 v4, -v1, v2, 1.0 -; GFX10-NEXT: v_fmac_f32_e32 v2, v4, v2 +; GFX10-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX10-NEXT: v_fmac_f32_e32 v2, v3, v2 +; GFX10-NEXT: v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0 ; GFX10-NEXT: v_mul_f32_e32 v4, v3, v2 ; GFX10-NEXT: v_fma_f32 v5, -v1, v4, v3 ; GFX10-NEXT: v_fmac_f32_e32 v4, v5, v2 ; GFX10-NEXT: v_fma_f32 v1, -v1, v4, v3 -; GFX10-NEXT: s_denorm_mode 12 ; GFX10-NEXT: v_div_fmas_f32 v1, v1, v2, v4 ; GFX10-NEXT: v_div_fixup_f32 v0, v1, 0x4640e400, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -6162,17 +6068,15 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #2 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_div_scale_f32 v1, null, 0x4640e400, 0x4640e400, v0 -; GFX11-NEXT: v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0 ; GFX11-NEXT: v_rcp_f32_e32 v2, v1 -; GFX11-NEXT: s_denorm_mode 15 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_fma_f32 v4, -v1, v2, 1.0 -; GFX11-NEXT: v_fmac_f32_e32 v2, v4, v2 +; GFX11-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX11-NEXT: v_fmac_f32_e32 v2, v3, v2 +; GFX11-NEXT: v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0 ; GFX11-NEXT: v_mul_f32_e32 v4, v3, v2 ; GFX11-NEXT: v_fma_f32 v5, -v1, v4, v3 ; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v2 ; GFX11-NEXT: v_fma_f32 v1, -v1, v4, v3 -; GFX11-NEXT: s_denorm_mode 12 ; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v4 ; GFX11-NEXT: v_div_fixup_f32 v0, v1, 0x4640e400, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -6570,15 +6474,13 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #2 { ; GFX6-FASTFMA-NEXT: s_mov_b32 s6, 0x4640e400 ; GFX6-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 ; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1 +; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX6-FASTFMA-NEXT: v_fma_f32 v2, v3, v2, v2 ; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, s6, v0, s6 -; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v1, v2, 1.0 -; GFX6-FASTFMA-NEXT: v_fma_f32 v2, v4, v2, v2 ; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2 ; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3 ; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4 ; GFX6-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3 -; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v1, v1, v2, v4 ; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v1, v0, s6 ; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] @@ -6590,14 +6492,12 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #2 { ; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 ; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, s6, v0, s6 ; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v3, v1 -; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, -v1, v3, 1.0 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, v4, v3, v3 ; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v4, v2, v3 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v1, v4, v2 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v3, v4 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v1, -v1, v4, v2 -; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v1, v1, v3, v4 ; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v1, v0, s6 ; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] @@ -6608,15 +6508,13 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #2 { ; GFX7-NEXT: s_mov_b32 s6, 0x4640e400 ; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 ; GFX7-NEXT: v_rcp_f32_e32 v2, v1 +; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 ; GFX7-NEXT: v_div_scale_f32 v3, vcc, s6, v0, s6 -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 -; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 ; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 ; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 ; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 ; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 ; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, s6 ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -6628,14 +6526,12 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #2 { ; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 ; GFX8-NEXT: v_div_scale_f32 v2, vcc, s6, v0, s6 ; GFX8-NEXT: v_rcp_f32_e32 v3, v1 -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 ; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 ; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 ; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 ; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 ; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 ; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, s6 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -6644,16 +6540,14 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #2 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_div_scale_f32 v1, s4, v0, v0, 0x4640e400 -; GFX10-NEXT: v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400 ; GFX10-NEXT: v_rcp_f32_e32 v2, v1 -; GFX10-NEXT: s_denorm_mode 15 -; GFX10-NEXT: v_fma_f32 v4, -v1, v2, 1.0 -; GFX10-NEXT: v_fmac_f32_e32 v2, v4, v2 +; GFX10-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX10-NEXT: v_fmac_f32_e32 v2, v3, v2 +; GFX10-NEXT: v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400 ; GFX10-NEXT: v_mul_f32_e32 v4, v3, v2 ; GFX10-NEXT: v_fma_f32 v5, -v1, v4, v3 ; GFX10-NEXT: v_fmac_f32_e32 v4, v5, v2 ; GFX10-NEXT: v_fma_f32 v1, -v1, v4, v3 -; GFX10-NEXT: s_denorm_mode 12 ; GFX10-NEXT: v_div_fmas_f32 v1, v1, v2, v4 ; GFX10-NEXT: v_div_fixup_f32 v0, v1, v0, 0x4640e400 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -6662,17 +6556,15 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #2 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, 0x4640e400 -; GFX11-NEXT: v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400 ; GFX11-NEXT: v_rcp_f32_e32 v2, v1 -; GFX11-NEXT: s_denorm_mode 15 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_fma_f32 v4, -v1, v2, 1.0 -; GFX11-NEXT: v_fmac_f32_e32 v2, v4, v2 +; GFX11-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX11-NEXT: v_fmac_f32_e32 v2, v3, v2 +; GFX11-NEXT: v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400 ; GFX11-NEXT: v_mul_f32_e32 v4, v3, v2 ; GFX11-NEXT: v_fma_f32 v5, -v1, v4, v3 ; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v2 ; GFX11-NEXT: v_fma_f32 v1, -v1, v4, v3 -; GFX11-NEXT: s_denorm_mode 12 ; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v4 ; GFX11-NEXT: v_div_fixup_f32 v0, v1, v0, 0x4640e400 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -7113,15 +7005,13 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) # ; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 ; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v3, v3 ; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 -; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 ; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 ; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] @@ -7132,14 +7022,12 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) # ; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 ; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 ; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 -; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 ; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 -; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 ; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] @@ -7149,15 +7037,13 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) # ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 ; GFX7-NEXT: v_rcp_f32_e32 v3, v2 +; GFX7-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX7-NEXT: v_fma_f32 v3, v4, v3, v3 ; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3 ; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 ; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -7168,14 +7054,12 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) # ; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 ; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 ; GFX8-NEXT: v_rcp_f32_e32 v4, v2 -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 ; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 ; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 ; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 ; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 ; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 ; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -7184,16 +7068,14 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) # ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 -; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX10-NEXT: v_rcp_f32_e32 v3, v2 -; GFX10-NEXT: s_denorm_mode 15 -; GFX10-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX10-NEXT: v_fmac_f32_e32 v3, v5, v3 +; GFX10-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v3 +; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 ; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX10-NEXT: s_denorm_mode 12 ; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -7202,17 +7084,15 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) # ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 -; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX11-NEXT: v_rcp_f32_e32 v3, v2 -; GFX11-NEXT: s_denorm_mode 15 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v3 +; GFX11-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v3 +; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 ; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX11-NEXT: s_denorm_mode 12 ; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -7660,15 +7540,13 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) # ; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 ; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v3, v3 ; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 -; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 ; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 ; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] @@ -7679,14 +7557,12 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) # ; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 ; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 ; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 -; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 ; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 ; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 -; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 ; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] @@ -7696,15 +7572,13 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) # ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 ; GFX7-NEXT: v_rcp_f32_e32 v3, v2 +; GFX7-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX7-NEXT: v_fma_f32 v3, v4, v3, v3 ; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3 ; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 ; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -7715,14 +7589,12 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) # ; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 ; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 ; GFX8-NEXT: v_rcp_f32_e32 v4, v2 -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 ; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 ; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 ; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 ; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 ; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 ; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -7731,16 +7603,14 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) # ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 -; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX10-NEXT: v_rcp_f32_e32 v3, v2 -; GFX10-NEXT: s_denorm_mode 15 -; GFX10-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX10-NEXT: v_fmac_f32_e32 v3, v5, v3 +; GFX10-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v3 +; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 ; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX10-NEXT: s_denorm_mode 12 ; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -7749,17 +7619,15 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) # ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 -; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX11-NEXT: v_rcp_f32_e32 v3, v2 -; GFX11-NEXT: s_denorm_mode 15 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v3 +; GFX11-NEXT: v_fma_f32 v4, -v2, v3, 1.0 +; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v3 +; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 ; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 ; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 ; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX11-NEXT: s_denorm_mode 12 ; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31]