Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/SelectionDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -2322,6 +2322,10 @@ class SelectionDAG {
/// +nan are considered positive, -0.0, -inf and -nan are not.
LLVM_ABI bool cannotBeOrderedNegativeFP(SDValue Op) const;

/// Check if all uses of a floating-point value are insensitive to signed
/// zeros.
LLVM_ABI bool allUsesSignedZeroInsensitive(SDValue Op) const;

/// Test whether two SDValues are known to compare equal. This
/// is true if they are the same value, or if one is negative zero and the
/// other positive zero.
Expand Down
20 changes: 14 additions & 6 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18891,12 +18891,13 @@ static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP;
assert(IsSigned || IsUnsigned);

bool IsSignedZeroSafe = DAG.getTarget().Options.NoSignedZerosFPMath;
bool IsSignedZeroSafe = DAG.getTarget().Options.NoSignedZerosFPMath ||
DAG.allUsesSignedZeroInsensitive(SDValue(N, 0));
// For signed conversions: The optimization changes signed zero behavior.
if (IsSigned && !IsSignedZeroSafe)
return SDValue();
// For unsigned conversions, we need FABS to canonicalize -0.0 to +0.0
// (unless NoSignedZerosFPMath is set).
// (unless outputting a signed zero is OK).
if (IsUnsigned && !IsSignedZeroSafe && !TLI.isFAbsFree(VT))
return SDValue();

Expand Down Expand Up @@ -19375,10 +19376,17 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
// FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
// know it was called from a context with a nsz flag if the input fsub does
// not.
if (N0.getOpcode() == ISD::FSUB && N->getFlags().hasNoSignedZeros() &&
N0.hasOneUse()) {
return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
N0.getOperand(0));
if (N0.getOpcode() == ISD::FSUB && N0.hasOneUse()) {
SDValue X = N0.getOperand(0);
SDValue Y = N0.getOperand(1);

// Safe if NoSignedZeros, or if we can prove X != Y (avoiding the -0.0 vs
// +0.0 issue) For now, we use a conservative check: if either operand is
// known never zero, then X - Y can't produce a signed zero from X == Y.
if (N->getFlags().hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(X) ||
DAG.isKnownNeverZeroFloat(Y)) {
return DAG.getNode(ISD::FSUB, SDLoc(N), VT, Y, X);
}
}

if (SimplifyDemandedBits(SDValue(N, 0)))
Expand Down
29 changes: 29 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6075,6 +6075,35 @@ bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
Op, [](ConstantFPSDNode *C) { return !C->isZero(); });
}

bool SelectionDAG::allUsesSignedZeroInsensitive(SDValue Op) const {
assert(Op.getValueType().isFloatingPoint());
return all_of(Op->uses(), [&](SDUse &Use) {
SDNode *User = Use.getUser();
unsigned OperandNo = Use.getOperandNo();

// Check if this use is insensitive to the sign of zero
switch (User->getOpcode()) {
case ISD::SETCC:
// Comparisons: IEEE-754 specifies +0.0 == -0.0.
case ISD::FABS:
// fabs always produces +0.0.
return true;
case ISD::FCOPYSIGN:
// copysign overwrites the sign bit of the first operand.
return OperandNo == 0;
case ISD::FADD:
case ISD::FSUB: {
// Arithmetic with non-zero constants fixes the uncertainty around the
// sign bit.
SDValue Other = User->getOperand(1 - OperandNo);
return isKnownNeverZeroFloat(Other);
}
default:
return false;
}
});
}

bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
if (Depth >= MaxRecursionDepth)
return false; // Limit search depth.
Expand Down
104 changes: 104 additions & 0 deletions llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,111 @@ entry:
ret float %f
}

define i1 @test_fcmp(float %x) {
; CHECK-LABEL: test_fcmp:
; CHECK: // %bb.0:
; CHECK-NEXT: frintz s0, s0
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
;
; NO-SIGNED-ZEROS-LABEL: test_fcmp:
; NO-SIGNED-ZEROS: // %bb.0:
; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
; NO-SIGNED-ZEROS-NEXT: fcmp s0, #0.0
; NO-SIGNED-ZEROS-NEXT: cset w0, eq
; NO-SIGNED-ZEROS-NEXT: ret
%conv1 = fptosi float %x to i32
%conv2 = sitofp i32 %conv1 to float
%cmp = fcmp oeq float %conv2, 0.0
ret i1 %cmp
}

define float @test_fabs(float %x) {
; CHECK-LABEL: test_fabs:
; CHECK: // %bb.0:
; CHECK-NEXT: frintz s0, s0
; CHECK-NEXT: fabs s0, s0
; CHECK-NEXT: ret
;
; NO-SIGNED-ZEROS-LABEL: test_fabs:
; NO-SIGNED-ZEROS: // %bb.0:
; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
; NO-SIGNED-ZEROS-NEXT: fabs s0, s0
; NO-SIGNED-ZEROS-NEXT: ret
%conv1 = fptosi float %x to i32
%conv2 = sitofp i32 %conv1 to float
%abs = call float @llvm.fabs.f32(float %conv2)
ret float %abs
}

define float @test_copysign(float %x, float %y) {
; CHECK-LABEL: test_copysign:
; CHECK: // %bb.0:
; CHECK-NEXT: frintz s0, s0
; CHECK-NEXT: mvni v2.4s, #128, lsl #24
; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: ret
;
; NO-SIGNED-ZEROS-LABEL: test_copysign:
; NO-SIGNED-ZEROS: // %bb.0:
; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
; NO-SIGNED-ZEROS-NEXT: mvni v2.4s, #128, lsl #24
; NO-SIGNED-ZEROS-NEXT: // kill: def $s1 killed $s1 def $q1
; NO-SIGNED-ZEROS-NEXT: bif v0.16b, v1.16b, v2.16b
; NO-SIGNED-ZEROS-NEXT: // kill: def $s0 killed $s0 killed $q0
; NO-SIGNED-ZEROS-NEXT: ret
%conv1 = fptosi float %x to i32
%conv2 = sitofp i32 %conv1 to float
%combine = call float @llvm.copysign.f32(float %conv2, float %y)
ret float %combine
}

define float @test_fadd(float %x) {
; CHECK-LABEL: test_fadd:
; CHECK: // %bb.0:
; CHECK-NEXT: frintz s0, s0
; CHECK-NEXT: fmov s1, #1.00000000
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
;
; NO-SIGNED-ZEROS-LABEL: test_fadd:
; NO-SIGNED-ZEROS: // %bb.0:
; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
; NO-SIGNED-ZEROS-NEXT: fmov s1, #1.00000000
; NO-SIGNED-ZEROS-NEXT: fadd s0, s0, s1
; NO-SIGNED-ZEROS-NEXT: ret
%conv1 = fptosi float %x to i32
%conv2 = sitofp i32 %conv1 to float
%add = fadd float %conv2, 1.0
ret float %add
}

define float @test_fsub(float %x) {
; CHECK-LABEL: test_fsub:
; CHECK: // %bb.0:
; CHECK-NEXT: frintz s0, s0
; CHECK-NEXT: fmov s1, #-1.00000000
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
;
; NO-SIGNED-ZEROS-LABEL: test_fsub:
; NO-SIGNED-ZEROS: // %bb.0:
; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
; NO-SIGNED-ZEROS-NEXT: fmov s1, #-1.00000000
; NO-SIGNED-ZEROS-NEXT: fadd s0, s0, s1
; NO-SIGNED-ZEROS-NEXT: ret
%conv1 = fptosi float %x to i32
%conv2 = sitofp i32 %conv1 to float
%sub = fsub float %conv2, 1.0
ret float %sub
}

declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.smax.i32(i32, i32)
declare i32 @llvm.umin.i32(i32, i32)
declare i32 @llvm.umax.i32(i32, i32)
declare float @llvm.fabs.f32(float)
declare float @llvm.copysign.f32(float, float)
64 changes: 21 additions & 43 deletions llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2615,65 +2615,43 @@ define half @select_fneg_posk_src_add_f16(i32 %c, half %x, half %y) {
}

define half @select_fneg_posk_src_sub_f16(i32 %c, half %x) {
; CI-SAFE-LABEL: select_fneg_posk_src_sub_f16:
; CI-SAFE: ; %bb.0:
; CI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1
; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1
; CI-SAFE-NEXT: v_add_f32_e32 v1, -4.0, v1
; CI-SAFE-NEXT: v_cndmask_b32_e64 v0, 2.0, -v1, vcc
; CI-SAFE-NEXT: s_setpc_b64 s[30:31]
; CI-LABEL: select_fneg_posk_src_sub_f16:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
; CI-NEXT: v_sub_f32_e32 v1, 4.0, v1
; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v1, vcc
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-SAFE-LABEL: select_fneg_posk_src_sub_f16:
; VI-SAFE: ; %bb.0:
; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-SAFE-NEXT: v_add_f16_e32 v1, -4.0, v1
; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x8000, v1
; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
; VI-LABEL: select_fneg_posk_src_sub_f16:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_sub_f16_e32 v1, 4.0, v1
; VI-NEXT: v_mov_b32_e32 v2, 0x4000
; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_sub_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_add_f16_e32 v1.l, -4.0, v1.l
; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v1.l
; GFX11-SAFE-TRUE16-NEXT: v_sub_f16_e32 v0.l, 4.0, v1.l
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_sub_f16:
; GFX11-SAFE-FAKE16: ; %bb.0:
; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-FAKE16-NEXT: v_add_f16_e32 v1, -4.0, v1
; GFX11-SAFE-FAKE16-NEXT: v_sub_f16_e32 v1, 4.0, v1
; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v1, 0x8000, v1
; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; CI-NSZ-LABEL: select_fneg_posk_src_sub_f16:
; CI-NSZ: ; %bb.0:
; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v1, v1
; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v1, v1
; CI-NSZ-NEXT: v_sub_f32_e32 v1, 4.0, v1
; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v1, vcc
; CI-NSZ-NEXT: s_setpc_b64 s[30:31]
;
; VI-NSZ-LABEL: select_fneg_posk_src_sub_f16:
; VI-NSZ: ; %bb.0:
; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NSZ-NEXT: v_sub_f16_e32 v1, 4.0, v1
; VI-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000
; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_sub_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down
50 changes: 18 additions & 32 deletions llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3277,48 +3277,37 @@ define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) {
; CI-LABEL: select_fneg_posk_src_sub_v2f16:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
; CI-NEXT: v_add_f32_e32 v3, -4.0, v3
; CI-NEXT: v_add_f32_e32 v2, -4.0, v2
; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; CI-NEXT: v_or_b32_e32 v2, v2, v3
; CI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
; CI-NEXT: v_cvt_f32_f16_e32 v3, v2
; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
; CI-NEXT: v_sub_f32_e32 v2, 4.0, v2
; CI-NEXT: v_sub_f32_e32 v3, 4.0, v3
; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: select_fneg_posk_src_sub_v2f16:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; VI-NEXT: v_mov_b32_e32 v1, 0xc400
; VI-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-NEXT: v_add_f16_e32 v2, -4.0, v2
; VI-NEXT: v_or_b32_e32 v1, v2, v1
; VI-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
; VI-NEXT: v_mov_b32_e32 v2, 0x4000
; VI-NEXT: v_mov_b32_e32 v1, 0x4400
; VI-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-NEXT: v_sub_f16_e32 v2, 4.0, v2
; VI-NEXT: v_mov_b32_e32 v3, 0x4000
; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
; VI-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
; VI-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
; VI-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: select_fneg_posk_src_sub_v2f16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; GFX9-NEXT: v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0]
; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
; GFX9-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
Expand All @@ -3330,28 +3319,25 @@ define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) {
; GFX11-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
; GFX11-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
Expand Down
Loading