Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17770,7 +17770,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
if (N1C && N1C->isZero())
if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
if (N1C->isNegative() || Flags.hasNoSignedZeros())
return N0;

if (SDValue NewSel = foldBinOpIntoSelect(N))
Expand Down Expand Up @@ -17823,11 +17823,10 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
return DAG.getConstantFP(0.0, DL, VT);
}

// If 'unsafe math' or reassoc and nsz, fold lots of things.
// If reassoc and nsz, fold lots of things.
// TODO: break out portions of the transformations below for which Unsafe is
// considered and which do not require both nsz and reassoc
if ((Options.NoSignedZerosFPMath ||
(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() &&
AllowNewConst) {
// fadd (fadd x, c1), c2 -> fadd x, c1 + c2
if (N1CFP && N0.getOpcode() == ISD::FADD &&
Expand Down Expand Up @@ -17911,10 +17910,9 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
DAG.getConstantFP(4.0, DL, VT));
}
}
} // enable-unsafe-fp-math && AllowNewConst
} // reassoc && nsz && AllowNewConst

if ((Options.NoSignedZerosFPMath ||
(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))) {
if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()) {
// Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
VT, N0, N1, Flags))
Expand Down
333 changes: 97 additions & 236 deletions llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -581,145 +581,63 @@ define { half, half } @v_fneg_add_multi_use_fneg_x_f16(half %a, half %b, half %c
ret { half, half } %insert.1
}

; This one asserted with -enable-no-signed-zeros-fp-math
define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x i32> %arg) #0 {
; SI-SAFE-LABEL: fneg_fadd_0_f16:
; SI-SAFE: ; %bb.0: ; %.entry
; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, s1
; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, s0
; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SAFE-NEXT: v_div_scale_f32 v2, s[0:1], v0, v0, 1.0
; SI-SAFE-NEXT: v_rcp_f32_e32 v3, v2
; SI-SAFE-NEXT: v_div_scale_f32 v4, vcc, 1.0, v0, 1.0
; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; SI-SAFE-NEXT: v_fma_f32 v5, -v2, v3, 1.0
; SI-SAFE-NEXT: v_fma_f32 v3, v5, v3, v3
; SI-SAFE-NEXT: v_mul_f32_e32 v5, v4, v3
; SI-SAFE-NEXT: v_fma_f32 v6, -v2, v5, v4
; SI-SAFE-NEXT: v_fma_f32 v5, v6, v3, v5
; SI-SAFE-NEXT: v_fma_f32 v2, -v2, v5, v4
; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
; SI-SAFE-NEXT: v_div_fmas_f32 v2, v2, v3, v5
; SI-SAFE-NEXT: v_div_fixup_f32 v0, v2, v0, 1.0
; SI-SAFE-NEXT: v_mad_f32 v0, v0, 0, 0
; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1
; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000
; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
; SI-SAFE-NEXT: ; return to shader part epilog
;
; SI-NSZ-LABEL: fneg_fadd_0_f16:
; SI-NSZ: ; %bb.0: ; %.entry
; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
; SI-NSZ-NEXT: v_cvt_f16_f32_e32 v0, s1
; SI-NSZ-NEXT: v_cvt_f16_f32_e32 v1, s0
; SI-NSZ-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-NSZ-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-NSZ-NEXT: v_div_scale_f32 v2, s[0:1], v0, v0, 1.0
; SI-NSZ-NEXT: v_rcp_f32_e32 v3, v2
; SI-NSZ-NEXT: v_div_scale_f32 v4, vcc, 1.0, v0, 1.0
; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; SI-NSZ-NEXT: v_fma_f32 v5, -v2, v3, 1.0
; SI-NSZ-NEXT: v_fma_f32 v3, v5, v3, v3
; SI-NSZ-NEXT: v_mul_f32_e32 v5, v4, v3
; SI-NSZ-NEXT: v_fma_f32 v6, -v2, v5, v4
; SI-NSZ-NEXT: v_fma_f32 v5, v6, v3, v5
; SI-NSZ-NEXT: v_fma_f32 v2, -v2, v5, v4
; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
; SI-NSZ-NEXT: v_div_fmas_f32 v2, v2, v3, v5
; SI-NSZ-NEXT: v_div_fixup_f32 v0, v2, v0, 1.0
; SI-NSZ-NEXT: v_mul_f32_e32 v0, 0x80000000, v0
; SI-NSZ-NEXT: v_cmp_nlt_f32_e64 vcc, -v0, v1
; SI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7fc00000
; SI-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
; SI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
; SI-NSZ-NEXT: ; return to shader part epilog
;
; VI-SAFE-LABEL: fneg_fadd_0_f16:
; VI-SAFE: ; %bb.0: ; %.entry
; VI-SAFE-NEXT: v_rcp_f16_e32 v0, s1
; VI-SAFE-NEXT: v_mov_b32_e32 v1, s0
; VI-SAFE-NEXT: v_mul_f16_e32 v0, 0, v0
; VI-SAFE-NEXT: v_add_f16_e32 v0, 0, v0
; VI-SAFE-NEXT: v_xor_b32_e32 v2, 0x8000, v0
; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, s0, v0
; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7e00
; VI-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
; VI-SAFE-NEXT: ; return to shader part epilog
;
; VI-NSZ-LABEL: fneg_fadd_0_f16:
; VI-NSZ: ; %bb.0: ; %.entry
; VI-NSZ-NEXT: v_rcp_f16_e32 v0, s1
; VI-NSZ-NEXT: v_mov_b32_e32 v1, s0
; VI-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
; VI-NSZ-NEXT: v_cmp_nlt_f16_e64 vcc, -v0, s0
; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7e00
; VI-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
; VI-NSZ-NEXT: ; return to shader part epilog
;
; GFX11-SAFE-LABEL: fneg_fadd_0_f16:
; GFX11-SAFE: ; %bb.0: ; %.entry
; GFX11-SAFE-NEXT: v_rcp_f16_e32 v0, s1
; GFX11-SAFE-NEXT: s_waitcnt_depctr 0xfff
; GFX11-SAFE-NEXT: v_mul_f16_e32 v0, 0, v0
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_add_f16_e32 v0, 0, v0
; GFX11-SAFE-NEXT: v_xor_b32_e32 v1, 0x8000, v0
; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, s0, v0
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo
; GFX11-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
; GFX11-SAFE-NEXT: ; return to shader part epilog
;
; GFX11-NSZ-LABEL: fneg_fadd_0_f16:
; GFX11-NSZ: ; %bb.0: ; %.entry
; GFX11-NSZ-NEXT: v_rcp_f16_e32 v0, s1
; GFX11-NSZ-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e64 s1, -v0, s0
; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
; GFX11-NSZ-NEXT: ; return to shader part epilog
; GFX11-SAFE-TRUE16-LABEL: fneg_fadd_0_f16:
; GFX11-SAFE-TRUE16: ; %bb.0: ; %.entry
; GFX11-SAFE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_add_f16_e32 v0.l, 0, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, s0, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x8000, v1
; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v0/*Invalid register, operand has 'VS_16' register class*/, s0, vcc_lo
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo
; GFX11-SAFE-TRUE16-NEXT: ; return to shader part epilog
; GFX11-NSZ-TRUE16-LABEL: fneg_fadd_0_f16:
; GFX11-NSZ-TRUE16: ; %bb.0: ; %.entry
; GFX11-NSZ-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x8000, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_cmp_nlt_f16_e64 s1, -v0.l, s0
; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, s0, s1
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo
; GFX11-NSZ-TRUE16-NEXT: ; return to shader part epilog
define amdgpu_ps half @fneg_fadd_0_safe_f16(half inreg %tmp2, half inreg %tmp6, <4 x i32> %arg) #0 {
; SI-LABEL: fneg_fadd_0_safe_f16:
; SI: ; %bb.0: ; %.entry
; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
; SI-NEXT: v_cvt_f16_f32_e32 v0, s1
; SI-NEXT: v_cvt_f16_f32_e32 v1, s0
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-NEXT: v_div_scale_f32 v2, s[0:1], v0, v0, 1.0
; SI-NEXT: v_rcp_f32_e32 v3, v2
; SI-NEXT: v_div_scale_f32 v4, vcc, 1.0, v0, 1.0
; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; SI-NEXT: v_fma_f32 v5, -v2, v3, 1.0
; SI-NEXT: v_fma_f32 v3, v5, v3, v3
; SI-NEXT: v_mul_f32_e32 v5, v4, v3
; SI-NEXT: v_fma_f32 v6, -v2, v5, v4
; SI-NEXT: v_fma_f32 v5, v6, v3, v5
; SI-NEXT: v_fma_f32 v2, -v2, v5, v4
; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
; SI-NEXT: v_div_fmas_f32 v2, v2, v3, v5
; SI-NEXT: v_div_fixup_f32 v0, v2, v0, 1.0
; SI-NEXT: v_mad_f32 v0, v0, 0, 0
; SI-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1
; SI-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
; SI-NEXT: v_mov_b32_e32 v1, 0x7fc00000
; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
; SI-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
; SI-NEXT: ; return to shader part epilog
;
; VI-LABEL: fneg_fadd_0_safe_f16:
; VI: ; %bb.0: ; %.entry
; VI-NEXT: v_rcp_f16_e32 v0, s1
; VI-NEXT: v_mov_b32_e32 v1, s0
; VI-NEXT: v_mul_f16_e32 v0, 0, v0
; VI-NEXT: v_add_f16_e32 v0, 0, v0
; VI-NEXT: v_xor_b32_e32 v2, 0x8000, v0
; VI-NEXT: v_cmp_ngt_f16_e32 vcc, s0, v0
; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; VI-NEXT: v_mov_b32_e32 v1, 0x7e00
; VI-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
; VI-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
; VI-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: fneg_fadd_0_safe_f16:
; GFX11: ; %bb.0: ; %.entry
; GFX11-NEXT: v_rcp_f16_e32 v0, s1
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_mul_f16_e32 v0, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_add_f16_e32 v0, 0, v0
; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v0
; GFX11-NEXT: v_cmp_ngt_f16_e32 vcc_lo, s0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo
; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
; GFX11-NEXT: ; return to shader part epilog
.entry:
%tmp7 = fdiv half 1.000000e+00, %tmp6
%tmp8 = fmul half 0.000000e+00, %tmp7
Expand All @@ -733,108 +651,51 @@ define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x
ret half %.i198
}

; This is a workaround because -enable-no-signed-zeros-fp-math does not set up
; function attribute unsafe-fp-math automatically. Combine with the previous test
; when that is done.
define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <4 x i32> %arg) #2 {
; SI-SAFE-LABEL: fneg_fadd_0_nsz_f16:
; SI-SAFE: ; %bb.0: ; %.entry
; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, s0
; SI-SAFE-NEXT: s_brev_b32 s0, 1
; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000
; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, 0, v0
; SI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
; SI-SAFE-NEXT: ; return to shader part epilog
;
; SI-NSZ-LABEL: fneg_fadd_0_nsz_f16:
; SI-NSZ: ; %bb.0: ; %.entry
; SI-NSZ-NEXT: v_cvt_f16_f32_e32 v0, s1
; SI-NSZ-NEXT: v_cvt_f16_f32_e32 v1, s0
; SI-NSZ-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; SI-NSZ-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-NSZ-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-NSZ-NEXT: v_rcp_f32_e32 v0, v0
; SI-NSZ-NEXT: v_mul_f32_e32 v0, 0x80000000, v0
; SI-NSZ-NEXT: v_cmp_nlt_f32_e64 vcc, -v0, v1
; SI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
; SI-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; SI-NSZ-NEXT: ; return to shader part epilog
;
; VI-SAFE-LABEL: fneg_fadd_0_nsz_f16:
; VI-SAFE: ; %bb.0: ; %.entry
; VI-SAFE-NEXT: v_mov_b32_e32 v0, 0x8000
; VI-SAFE-NEXT: v_mov_b32_e32 v1, s0
; VI-SAFE-NEXT: v_cmp_ngt_f16_e64 vcc, s0, 0
; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7e00
; VI-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
; VI-SAFE-NEXT: ; return to shader part epilog
;
; VI-NSZ-LABEL: fneg_fadd_0_nsz_f16:
; VI-NSZ: ; %bb.0: ; %.entry
; VI-NSZ-NEXT: v_rcp_f16_e32 v0, s1
; VI-NSZ-NEXT: v_mov_b32_e32 v1, s0
; VI-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
; VI-NSZ-NEXT: v_cmp_nlt_f16_e64 vcc, -v0, s0
; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7e00
; VI-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
; VI-NSZ-NEXT: ; return to shader part epilog
;
; GFX11-SAFE-LABEL: fneg_fadd_0_nsz_f16:
; GFX11-SAFE: ; %bb.0: ; %.entry
; GFX11-SAFE-NEXT: v_mov_b32_e32 v0, s0
; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e64 vcc_lo, s0, 0
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, 0x8000, v0, vcc_lo
; GFX11-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
; GFX11-SAFE-NEXT: ; return to shader part epilog
;
; GFX11-NSZ-LABEL: fneg_fadd_0_nsz_f16:
; GFX11-NSZ: ; %bb.0: ; %.entry
; GFX11-NSZ-NEXT: v_rcp_f16_e32 v0, s1
; GFX11-NSZ-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e64 s1, -v0, s0
; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
; GFX11-NSZ-NEXT: ; return to shader part epilog
; GFX11-SAFE-TRUE16-LABEL: fneg_fadd_0_nsz_f16:
; GFX11-SAFE-TRUE16: ; %bb.0: ; %.entry
; GFX11-SAFE-TRUE16-NEXT: v_cmp_ngt_f16_e64 s1, s0, 0
; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x8000
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, s0, s1
; GFX11-SAFE-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo
; GFX11-SAFE-TRUE16-NEXT: ; return to shader part epilog
; GFX11-NSZ-TRUE16-LABEL: fneg_fadd_0_nsz_f16:
; GFX11-NSZ-TRUE16: ; %bb.0: ; %.entry
; GFX11-NSZ-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x8000, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_cmp_nlt_f16_e64 s1, -v0.l, s0
; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, s0, s1
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo
; GFX11-NSZ-TRUE16-NEXT: ; return to shader part epilog
; SI-LABEL: fneg_fadd_0_nsz_f16:
; SI: ; %bb.0: ; %.entry
; SI-NEXT: v_cvt_f16_f32_e32 v0, s1
; SI-NEXT: v_cvt_f16_f32_e32 v1, s0
; SI-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-NEXT: v_rcp_f32_e32 v0, v0
; SI-NEXT: v_mul_f32_e32 v0, 0x80000000, v0
; SI-NEXT: v_cmp_nlt_f32_e64 vcc, -v0, v1
; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
; SI-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; SI-NEXT: ; return to shader part epilog
;
; VI-LABEL: fneg_fadd_0_nsz_f16:
; VI: ; %bb.0: ; %.entry
; VI-NEXT: v_rcp_f16_e32 v0, s1
; VI-NEXT: v_mov_b32_e32 v1, s0
; VI-NEXT: v_mul_f16_e32 v0, 0x8000, v0
; VI-NEXT: v_cmp_nlt_f16_e64 vcc, -v0, s0
; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; VI-NEXT: v_mov_b32_e32 v1, 0x7e00
; VI-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
; VI-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
; VI-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: fneg_fadd_0_nsz_f16:
; GFX11: ; %bb.0: ; %.entry
; GFX11-NEXT: v_rcp_f16_e32 v0, s1
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_mul_f16_e32 v0, 0x8000, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cmp_nlt_f16_e64 s1, -v0, s0
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
; GFX11-NEXT: ; return to shader part epilog
.entry:
%tmp7 = fdiv afn half 1.000000e+00, %tmp6
%tmp8 = fmul contract half 0.000000e+00, %tmp7
%tmp9 = fmul reassoc nnan arcp contract half 0.000000e+00, %tmp8
%.i188 = fadd nnan ninf contract half %tmp9, 0.000000e+00
%.i188 = fadd nsz half %tmp9, 0.000000e+00
%tmp10 = fcmp uge half %.i188, %tmp2
%tmp11 = fneg half %.i188
%.i092 = select i1 %tmp10, half %tmp2, half %tmp11
Expand Down
Loading