-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[DAGCombiner] Remove NoSignedZerosFPMath
uses in visitFADD
#160635
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-llvm-selectiondag Author: None (paperchalice) ChangesRemove these global flags and use node level flags instead. Patch is 50.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160635.diff 5 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a6ba6e518899f..c81568672de3c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17770,7 +17770,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
if (N1C && N1C->isZero())
- if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
+ if (N1C->isNegative() || Flags.hasNoSignedZeros())
return N0;
if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -17823,11 +17823,10 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
return DAG.getConstantFP(0.0, DL, VT);
}
- // If 'unsafe math' or reassoc and nsz, fold lots of things.
+ // If reassoc and nsz, fold lots of things.
// TODO: break out portions of the transformations below for which Unsafe is
// considered and which do not require both nsz and reassoc
- if ((Options.NoSignedZerosFPMath ||
- (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
+ if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() &&
AllowNewConst) {
// fadd (fadd x, c1), c2 -> fadd x, c1 + c2
if (N1CFP && N0.getOpcode() == ISD::FADD &&
@@ -17911,10 +17910,9 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
DAG.getConstantFP(4.0, DL, VT));
}
}
- } // enable-unsafe-fp-math && AllowNewConst
+ } // reassoc && nsz && AllowNewConst
- if ((Options.NoSignedZerosFPMath ||
- (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))) {
+ if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()) {
// Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
VT, N0, N1, Flags))
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
index 462d7748b86cd..a95902a2f0e0c 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
@@ -583,115 +583,62 @@ define { half, half } @v_fneg_add_multi_use_fneg_x_f16(half %a, half %b, half %c
; This one asserted with -enable-no-signed-zeros-fp-math
define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x i32> %arg) #0 {
-; SI-SAFE-LABEL: fneg_fadd_0_f16:
-; SI-SAFE: ; %bb.0: ; %.entry
-; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
-; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, s1
-; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, s0
-; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-SAFE-NEXT: v_div_scale_f32 v2, s[0:1], v0, v0, 1.0
-; SI-SAFE-NEXT: v_rcp_f32_e32 v3, v2
-; SI-SAFE-NEXT: v_div_scale_f32 v4, vcc, 1.0, v0, 1.0
-; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
-; SI-SAFE-NEXT: v_fma_f32 v5, -v2, v3, 1.0
-; SI-SAFE-NEXT: v_fma_f32 v3, v5, v3, v3
-; SI-SAFE-NEXT: v_mul_f32_e32 v5, v4, v3
-; SI-SAFE-NEXT: v_fma_f32 v6, -v2, v5, v4
-; SI-SAFE-NEXT: v_fma_f32 v5, v6, v3, v5
-; SI-SAFE-NEXT: v_fma_f32 v2, -v2, v5, v4
-; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
-; SI-SAFE-NEXT: v_div_fmas_f32 v2, v2, v3, v5
-; SI-SAFE-NEXT: v_div_fixup_f32 v0, v2, v0, 1.0
-; SI-SAFE-NEXT: v_mad_f32 v0, v0, 0, 0
-; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1
-; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
-; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000
-; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; SI-SAFE-NEXT: ; return to shader part epilog
-;
-; SI-NSZ-LABEL: fneg_fadd_0_f16:
-; SI-NSZ: ; %bb.0: ; %.entry
-; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
-; SI-NSZ-NEXT: v_cvt_f16_f32_e32 v0, s1
-; SI-NSZ-NEXT: v_cvt_f16_f32_e32 v1, s0
-; SI-NSZ-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-NSZ-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-NSZ-NEXT: v_div_scale_f32 v2, s[0:1], v0, v0, 1.0
-; SI-NSZ-NEXT: v_rcp_f32_e32 v3, v2
-; SI-NSZ-NEXT: v_div_scale_f32 v4, vcc, 1.0, v0, 1.0
-; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
-; SI-NSZ-NEXT: v_fma_f32 v5, -v2, v3, 1.0
-; SI-NSZ-NEXT: v_fma_f32 v3, v5, v3, v3
-; SI-NSZ-NEXT: v_mul_f32_e32 v5, v4, v3
-; SI-NSZ-NEXT: v_fma_f32 v6, -v2, v5, v4
-; SI-NSZ-NEXT: v_fma_f32 v5, v6, v3, v5
-; SI-NSZ-NEXT: v_fma_f32 v2, -v2, v5, v4
-; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
-; SI-NSZ-NEXT: v_div_fmas_f32 v2, v2, v3, v5
-; SI-NSZ-NEXT: v_div_fixup_f32 v0, v2, v0, 1.0
-; SI-NSZ-NEXT: v_mul_f32_e32 v0, 0x80000000, v0
-; SI-NSZ-NEXT: v_cmp_nlt_f32_e64 vcc, -v0, v1
-; SI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; SI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7fc00000
-; SI-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; SI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; SI-NSZ-NEXT: ; return to shader part epilog
-;
-; VI-SAFE-LABEL: fneg_fadd_0_f16:
-; VI-SAFE: ; %bb.0: ; %.entry
-; VI-SAFE-NEXT: v_rcp_f16_e32 v0, s1
-; VI-SAFE-NEXT: v_mov_b32_e32 v1, s0
-; VI-SAFE-NEXT: v_mul_f16_e32 v0, 0, v0
-; VI-SAFE-NEXT: v_add_f16_e32 v0, 0, v0
-; VI-SAFE-NEXT: v_xor_b32_e32 v2, 0x8000, v0
-; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, s0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7e00
-; VI-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; VI-SAFE-NEXT: ; return to shader part epilog
-;
-; VI-NSZ-LABEL: fneg_fadd_0_f16:
-; VI-NSZ: ; %bb.0: ; %.entry
-; VI-NSZ-NEXT: v_rcp_f16_e32 v0, s1
-; VI-NSZ-NEXT: v_mov_b32_e32 v1, s0
-; VI-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
-; VI-NSZ-NEXT: v_cmp_nlt_f16_e64 vcc, -v0, s0
-; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7e00
-; VI-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
-; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; VI-NSZ-NEXT: ; return to shader part epilog
-;
-; GFX11-SAFE-LABEL: fneg_fadd_0_f16:
-; GFX11-SAFE: ; %bb.0: ; %.entry
-; GFX11-SAFE-NEXT: v_rcp_f16_e32 v0, s1
-; GFX11-SAFE-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-SAFE-NEXT: v_mul_f16_e32 v0, 0, v0
-; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-NEXT: v_add_f16_e32 v0, 0, v0
-; GFX11-SAFE-NEXT: v_xor_b32_e32 v1, 0x8000, v0
-; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, s0, v0
-; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo
-; GFX11-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
-; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
-; GFX11-SAFE-NEXT: ; return to shader part epilog
-;
-; GFX11-NSZ-LABEL: fneg_fadd_0_f16:
-; GFX11-NSZ: ; %bb.0: ; %.entry
-; GFX11-NSZ-NEXT: v_rcp_f16_e32 v0, s1
-; GFX11-NSZ-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
-; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e64 s1, -v0, s0
-; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1
-; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
-; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
-; GFX11-NSZ-NEXT: ; return to shader part epilog
+; SI-LABEL: fneg_fadd_0_f16:
+; SI: ; %bb.0: ; %.entry
+; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
+; SI-NEXT: v_cvt_f16_f32_e32 v0, s1
+; SI-NEXT: v_cvt_f16_f32_e32 v1, s0
+; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-NEXT: v_div_scale_f32 v2, s[0:1], v0, v0, 1.0
+; SI-NEXT: v_rcp_f32_e32 v3, v2
+; SI-NEXT: v_div_scale_f32 v4, vcc, 1.0, v0, 1.0
+; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; SI-NEXT: v_fma_f32 v5, -v2, v3, 1.0
+; SI-NEXT: v_fma_f32 v3, v5, v3, v3
+; SI-NEXT: v_mul_f32_e32 v5, v4, v3
+; SI-NEXT: v_fma_f32 v6, -v2, v5, v4
+; SI-NEXT: v_fma_f32 v5, v6, v3, v5
+; SI-NEXT: v_fma_f32 v2, -v2, v5, v4
+; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
+; SI-NEXT: v_div_fmas_f32 v2, v2, v3, v5
+; SI-NEXT: v_div_fixup_f32 v0, v2, v0, 1.0
+; SI-NEXT: v_mad_f32 v0, v0, 0, 0
+; SI-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1
+; SI-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
+; SI-NEXT: v_mov_b32_e32 v1, 0x7fc00000
+; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
+; SI-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-NEXT: ; return to shader part epilog
+;
+; VI-LABEL: fneg_fadd_0_f16:
+; VI: ; %bb.0: ; %.entry
+; VI-NEXT: v_rcp_f16_e32 v0, s1
+; VI-NEXT: v_mov_b32_e32 v1, s0
+; VI-NEXT: v_mul_f16_e32 v0, 0, v0
+; VI-NEXT: v_add_f16_e32 v0, 0, v0
+; VI-NEXT: v_xor_b32_e32 v2, 0x8000, v0
+; VI-NEXT: v_cmp_ngt_f16_e32 vcc, s0, v0
+; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-NEXT: v_mov_b32_e32 v1, 0x7e00
+; VI-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: fneg_fadd_0_f16:
+; GFX11: ; %bb.0: ; %.entry
+; GFX11-NEXT: v_rcp_f16_e32 v0, s1
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f16_e32 v0, 0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_add_f16_e32 v0, 0, v0
+; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v0
+; GFX11-NEXT: v_cmp_ngt_f16_e32 vcc_lo, s0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo
+; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
+; GFX11-NEXT: ; return to shader part epilog
; GFX11-SAFE-TRUE16-LABEL: fneg_fadd_0_f16:
; GFX11-SAFE-TRUE16: ; %bb.0: ; %.entry
; GFX11-SAFE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
@@ -737,78 +684,45 @@ define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x
; function attribute unsafe-fp-math automatically. Combine with the previous test
; when that is done.
define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <4 x i32> %arg) #2 {
-; SI-SAFE-LABEL: fneg_fadd_0_nsz_f16:
-; SI-SAFE: ; %bb.0: ; %.entry
-; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, s0
-; SI-SAFE-NEXT: s_brev_b32 s0, 1
-; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000
-; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, 0, v0
-; SI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
-; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; SI-SAFE-NEXT: ; return to shader part epilog
-;
-; SI-NSZ-LABEL: fneg_fadd_0_nsz_f16:
-; SI-NSZ: ; %bb.0: ; %.entry
-; SI-NSZ-NEXT: v_cvt_f16_f32_e32 v0, s1
-; SI-NSZ-NEXT: v_cvt_f16_f32_e32 v1, s0
-; SI-NSZ-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; SI-NSZ-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-NSZ-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-NSZ-NEXT: v_rcp_f32_e32 v0, v0
-; SI-NSZ-NEXT: v_mul_f32_e32 v0, 0x80000000, v0
-; SI-NSZ-NEXT: v_cmp_nlt_f32_e64 vcc, -v0, v1
-; SI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; SI-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; SI-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; SI-NSZ-NEXT: ; return to shader part epilog
-;
-; VI-SAFE-LABEL: fneg_fadd_0_nsz_f16:
-; VI-SAFE: ; %bb.0: ; %.entry
-; VI-SAFE-NEXT: v_mov_b32_e32 v0, 0x8000
-; VI-SAFE-NEXT: v_mov_b32_e32 v1, s0
-; VI-SAFE-NEXT: v_cmp_ngt_f16_e64 vcc, s0, 0
-; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7e00
-; VI-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; VI-SAFE-NEXT: ; return to shader part epilog
-;
-; VI-NSZ-LABEL: fneg_fadd_0_nsz_f16:
-; VI-NSZ: ; %bb.0: ; %.entry
-; VI-NSZ-NEXT: v_rcp_f16_e32 v0, s1
-; VI-NSZ-NEXT: v_mov_b32_e32 v1, s0
-; VI-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
-; VI-NSZ-NEXT: v_cmp_nlt_f16_e64 vcc, -v0, s0
-; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7e00
-; VI-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
-; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; VI-NSZ-NEXT: ; return to shader part epilog
-;
-; GFX11-SAFE-LABEL: fneg_fadd_0_nsz_f16:
-; GFX11-SAFE: ; %bb.0: ; %.entry
-; GFX11-SAFE-NEXT: v_mov_b32_e32 v0, s0
-; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e64 vcc_lo, s0, 0
-; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, 0x8000, v0, vcc_lo
-; GFX11-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
-; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
-; GFX11-SAFE-NEXT: ; return to shader part epilog
-;
-; GFX11-NSZ-LABEL: fneg_fadd_0_nsz_f16:
-; GFX11-NSZ: ; %bb.0: ; %.entry
-; GFX11-NSZ-NEXT: v_rcp_f16_e32 v0, s1
-; GFX11-NSZ-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
-; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e64 s1, -v0, s0
-; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1
-; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
-; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
-; GFX11-NSZ-NEXT: ; return to shader part epilog
+; SI-LABEL: fneg_fadd_0_nsz_f16:
+; SI: ; %bb.0: ; %.entry
+; SI-NEXT: v_cvt_f16_f32_e32 v0, s1
+; SI-NEXT: v_cvt_f16_f32_e32 v1, s0
+; SI-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-NEXT: v_rcp_f32_e32 v0, v0
+; SI-NEXT: v_mul_f32_e32 v0, 0x80000000, v0
+; SI-NEXT: v_cmp_nlt_f32_e64 vcc, -v0, v1
+; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
+; SI-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
+; SI-NEXT: ; return to shader part epilog
+;
+; VI-LABEL: fneg_fadd_0_nsz_f16:
+; VI: ; %bb.0: ; %.entry
+; VI-NEXT: v_rcp_f16_e32 v0, s1
+; VI-NEXT: v_mov_b32_e32 v1, s0
+; VI-NEXT: v_mul_f16_e32 v0, 0x8000, v0
+; VI-NEXT: v_cmp_nlt_f16_e64 vcc, -v0, s0
+; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-NEXT: v_mov_b32_e32 v1, 0x7e00
+; VI-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: fneg_fadd_0_nsz_f16:
+; GFX11: ; %bb.0: ; %.entry
+; GFX11-NEXT: v_rcp_f16_e32 v0, s1
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f16_e32 v0, 0x8000, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cmp_nlt_f16_e64 s1, -v0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
+; GFX11-NEXT: ; return to shader part epilog
; GFX11-SAFE-TRUE16-LABEL: fneg_fadd_0_nsz_f16:
; GFX11-SAFE-TRUE16: ; %bb.0: ; %.entry
; GFX11-SAFE-TRUE16-NEXT: v_cmp_ngt_f16_e64 s1, s0, 0
@@ -834,7 +748,7 @@ define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <
%tmp7 = fdiv afn half 1.000000e+00, %tmp6
%tmp8 = fmul contract half 0.000000e+00, %tmp7
%tmp9 = fmul reassoc nnan arcp contract half 0.000000e+00, %tmp8
- %.i188 = fadd nnan ninf contract half %tmp9, 0.000000e+00
+ %.i188 = fadd nsz nnan ninf contract half %tmp9, 0.000000e+00
%tmp10 = fcmp uge half %.i188, %tmp2
%tmp11 = fneg half %.i188
%.i092 = select i1 %tmp10, half %tmp2, half %tmp11
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
index ba34e9245f39c..9fe9600b38dc9 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
@@ -881,101 +881,53 @@ define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(ptr addrspace(1) %out
; This one asserted with -enable-no-signed-zeros-fp-math
define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #0 {
-; SI-SAFE-LABEL: fneg_fadd_0:
-; SI-SAFE: ; %bb.0: ; %.entry
-; SI-SAFE-NEXT: v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
-; SI-SAFE-NEXT: v_rcp_f32_e32 v1, v0
-; SI-SAFE-NEXT: v_div_scale_f32 v2, vcc, 1.0, s1, 1.0
-; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
-; SI-SAFE-NEXT: v_fma_f32 v3, -v0, v1, 1.0
-; SI-SAFE-NEXT: v_fma_f32 v1, v3, v1, v1
-; SI-SAFE-NEXT: v_mul_f32_e32 v3, v2, v1
-; SI-SAFE-NEXT: v_fma_f32 v4, -v0, v3, v2
-; SI-SAFE-NEXT: v_fma_f32 v3, v4, v1, v3
-; SI-SAFE-NEXT: v_fma_f32 v0, -v0, v3, v2
-; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
-; SI-SAFE-NEXT: v_div_fmas_f32 v0, v0, v1, v3
-; SI-SAFE-NEXT: v_div_fixup_f32 v0, v0, s1, 1.0
-; SI-SAFE-NEXT: v_mad_f32 v0, v0, 0, 0
-; SI-SAFE-NEXT: v_mov_b32_e32 v1, s0
-; SI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
-; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
-; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000
-; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; SI-SAFE-NEXT: ; return to shader part epilog
-;
-; SI-NSZ-LABEL: fneg_fadd_0:
-; SI-NSZ: ; %bb.0: ; %.entry
-; SI-NSZ-NEXT: v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
-; SI-NSZ-NEXT: v_rcp_f32_e32 v1, v0
-; SI-NSZ-NEXT: v_div_scale_f32 v2, vcc, 1.0, s1, 1.0
-; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
-; SI-NSZ-NEXT: v_fma_f32 v3, -v0, v1, 1.0
-; SI-NSZ-NEXT: v_fma_f32 v1, v3, v1, v1
-; SI-NSZ-NEXT: v_mul_f32_e32 v3, v2, v1
-; SI-NSZ-NEXT: v_fma_f32 v4, -v0, v3, v2
-; SI-NSZ-NEXT: v_fma_f32 v3, v4, v1, v3
-; SI-NSZ-NEXT: v_fma_f32 v0, -v0, v3, v2
-; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
-; SI-NSZ-NEXT: v_div_fmas_f32 v0, v0, v1, v3
-; SI-NSZ-NEXT: v_div_fixup_f32 v0, v0, s1, 1.0
-; SI-NSZ-NEXT: v_mul_f32_e32 v0, 0, v0
-; SI-NSZ-NEXT: v_mov_b32_e32 v1, s0
-; SI-NSZ-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
-; SI-NSZ-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
-; SI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7fc00000
-; SI-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; SI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; SI-NSZ-NEXT: ; return to shader part epilog
-;
-; VI-SAFE-LABEL: fneg_fadd_0:
-; VI-SAFE: ; %bb.0: ; %.entry
-; VI-SAFE-NEXT: v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
-; VI-SAFE-NEXT: v_div_scale_f32 v1, vcc, 1.0, s1, 1.0
-; VI-SAFE-NEXT: v_rcp_f32_e32 v2, v0
-; VI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
-; VI-SAFE-NEXT: v_fma_f32 v3, -v0, v2, 1.0
-; VI-SAFE-NEXT: v_fma_f32 v2, v3, v2, v2
-; VI-SAFE-NEXT: v_mul_f32_e32 v3, v1, v2
-; VI-SAFE-NEXT: v_fma_f32 v4, -v0, v3, v1
-; VI-SAFE-NEXT: v_fma_f32 v3, v4, v2, v3
-; VI-SAFE-NEXT: v_fma_f32 v0, -v0, v3, v1
-; VI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
-; VI-SAFE-NEXT: v_div_fmas_f32 v0, v0, v2, v3
-; VI-SAFE-NEXT: v_mov_b32_e32 v2, s0
-; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000
-; VI-SAFE-NEXT: v_div_fixup_f32 v0, v0, s1, 1.0
-; VI-SAFE-NEXT: v_mad_f32 v0, v0, 0, 0
-; VI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v2, vcc
-; VI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; VI-SAFE-NEXT: ; return to shader part epilog
-;
-; VI-NSZ-LABEL: fneg_fadd_0:
-; VI-NSZ: ; %bb.0: ; %.entry
-; VI-NSZ-NEXT: v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
-; VI-NSZ-NEXT: v_div_scale_f32 v1, vcc, 1.0, s1, 1.0
-; VI-NSZ-NEXT: v_rcp_f32_e32 v2, v0
-; VI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
-; VI-NSZ-NEXT: v_fma_f32 v3, -v0, v2, 1.0
-; VI-NSZ-NEXT: v_fma_f32 v2, v3, v2, v2
-; VI-NSZ-NEX...
[truncated]
|
@@ -583,115 +583,62 @@ define { half, half } @v_fneg_add_multi_use_fneg_x_f16(half %a, half %b, half %c | |||
|
|||
; This one asserted with -enable-no-signed-zeros-fp-math | |||
define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x i32> %arg) #0 { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Lots of codegen changes in these tests, these should get the flag added to the content
@@ -737,78 +684,45 @@ define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x | |||
; function attribute unsafe-fp-math automatically. Combine with the previous test | |||
; when that is done. | |||
define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <4 x i32> %arg) #2 { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Check the comment above, should this copy be deleted?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These tests should be combined, but we still need to distinguish between nsz and safe version, that's also why there are lots of changes, NSZ and SAFE are combined.
d9d2f83
to
2319f4f
Compare
@@ -879,149 +879,23 @@ define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(ptr addrspace(1) %out | |||
ret void | |||
} | |||
|
|||
; This one asserted with -enable-no-signed-zeros-fp-math | |||
define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #0 { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Did this lose the safe / no-nsz coverage?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It loses the coverage indeed, in fact I don't know how to combine them, it has 3 cases: safe, nsz only and fast.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Duplicate function with different flags added?
2319f4f
to
61afe44
Compare
Found test results in nsz only and fast are same. Lines with label with |
…160635) Remove these global flags and use node level flags instead.
Remove these global flags and use node level flags instead.