Skip to content

Commit

Permalink
AMDGPU: Push fneg into bitcast of integer select
Browse files Browse the repository at this point in the history
Avoids some regressions in the math libraries in a future
patch.
  • Loading branch information
arsenm committed Apr 12, 2023
1 parent 6d3b779 commit f608ac6
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 80 deletions.
28 changes: 25 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -596,9 +596,12 @@ static bool fnegFoldsIntoOp(const SDNode *N) {
// TODO: Is there a benefit to checking the conditions performFNegCombine
// does? We don't for the other cases.
SDValue BCSrc = N->getOperand(0);
return BCSrc.getOpcode() == ISD::BUILD_VECTOR &&
BCSrc.getNumOperands() == 2 &&
BCSrc.getOperand(1).getValueSizeInBits() == 32;
if (BCSrc.getOpcode() == ISD::BUILD_VECTOR) {
return BCSrc.getNumOperands() == 2 &&
BCSrc.getOperand(1).getValueSizeInBits() == 32;
}

return BCSrc.getOpcode() == ISD::SELECT && BCSrc.getValueType() == MVT::f32;
}

return fnegFoldsIntoOpcode(Opc);
Expand Down Expand Up @@ -4182,6 +4185,25 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
return Result;
}

if (BCSrc.getOpcode() == ISD::SELECT && VT == MVT::f32) {
// fneg (bitcast (f32 (select cond, i32:lhs, i32:rhs))) ->
// select cond, (bitcast i32:lhs to f32), (bitcast i32:rhs to f32)
SDValue LHS =
DAG.getNode(ISD::BITCAST, SL, MVT::f32, BCSrc.getOperand(1));
SDValue RHS =
DAG.getNode(ISD::BITCAST, SL, MVT::f32, BCSrc.getOperand(2));

SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, MVT::f32, LHS);
SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, MVT::f32, RHS);

SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, MVT::f32,
BCSrc.getOperand(0), NegLHS, NegRHS);
if (!BCSrc.hasOneUse())
DAG.ReplaceAllUsesWith(BCSrc,
DAG.getNode(ISD::FNEG, SL, VT, NewSelect));
return NewSelect;
}

return SDValue();
}
default:
Expand Down
77 changes: 37 additions & 40 deletions llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3023,16 +3023,17 @@ define amdgpu_kernel void @s_fneg_select_infloop_regression_f64(double %arg, i1
; SI-NEXT: s_load_dword s4, s[0:1], 0xb
; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd
; SI-NEXT: v_bfrev_b32_e32 v0, 1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_and_b32 s4, 1, s4
; SI-NEXT: s_cselect_b32 s3, 0, s3
; SI-NEXT: s_xor_b32 s3, s3, 0x80000000
; SI-NEXT: s_cmp_eq_u32 s4, 1
; SI-NEXT: s_bitcmp1_b32 s4, 0
; SI-NEXT: s_cselect_b64 s[4:5], -1, 0
; SI-NEXT: v_mov_b32_e32 v1, s3
; SI-NEXT: s_and_b64 s[6:7], s[4:5], exec
; SI-NEXT: v_cndmask_b32_e64 v0, -v1, v0, s[4:5]
; SI-NEXT: s_cselect_b32 s2, 0, s2
; SI-NEXT: s_cselect_b32 s3, 0, s3
; SI-NEXT: v_mov_b32_e32 v3, s1
; SI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[4:5]
; SI-NEXT: v_mov_b32_e32 v0, s2
; SI-NEXT: v_mov_b32_e32 v1, s3
; SI-NEXT: v_mov_b32_e32 v2, s0
; SI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; SI-NEXT: s_endpgm
Expand All @@ -3042,16 +3043,17 @@ define amdgpu_kernel void @s_fneg_select_infloop_regression_f64(double %arg, i1
; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; VI-NEXT: v_bfrev_b32_e32 v0, 1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_and_b32 s4, 1, s4
; VI-NEXT: s_cselect_b32 s3, 0, s3
; VI-NEXT: s_xor_b32 s3, s3, 0x80000000
; VI-NEXT: s_cmp_eq_u32 s4, 1
; VI-NEXT: s_bitcmp1_b32 s4, 0
; VI-NEXT: s_cselect_b64 s[4:5], -1, 0
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: s_and_b64 s[6:7], s[4:5], exec
; VI-NEXT: v_cndmask_b32_e64 v0, -v1, v0, s[4:5]
; VI-NEXT: s_cselect_b32 s2, 0, s2
; VI-NEXT: s_cselect_b32 s3, 0, s3
; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[4:5]
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
Expand All @@ -3067,9 +3069,9 @@ define double @v_fneg_select_infloop_regression_f64(double %arg, i1 %arg1) {
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_and_b32_e32 v2, 1, v2
; GCN-NEXT: v_bfrev_b32_e32 v3, 1
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GCN-NEXT: v_cndmask_b32_e64 v1, -v1, v3, vcc
; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
; GCN-NEXT: s_setpc_b64 s[30:31]
Expand Down Expand Up @@ -3221,19 +3223,17 @@ define amdgpu_kernel void @s_fneg_select_infloop_regression_v2f32(<2 x float> %a
; SI-NEXT: s_load_dword s4, s[0:1], 0xb
; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd
; SI-NEXT: v_bfrev_b32_e32 v0, 1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_and_b32 s4, 1, s4
; SI-NEXT: s_cselect_b32 s2, 0, s2
; SI-NEXT: s_xor_b32 s2, s2, 0x80000000
; SI-NEXT: s_cmp_eq_u32 s4, 1
; SI-NEXT: s_cselect_b32 s3, 0, s3
; SI-NEXT: s_cselect_b32 s2, 0, s2
; SI-NEXT: s_xor_b32 s3, s3, 0x80000000
; SI-NEXT: s_cmp_eq_u32 s4, 1
; SI-NEXT: v_mov_b32_e32 v0, s2
; SI-NEXT: s_cselect_b32 s2, 0, s3
; SI-NEXT: v_mov_b32_e32 v3, s1
; SI-NEXT: s_bitcmp1_b32 s4, 0
; SI-NEXT: v_mov_b32_e32 v1, s2
; SI-NEXT: s_cselect_b64 s[4:5], -1, 0
; SI-NEXT: v_cndmask_b32_e64 v2, -v1, v0, s[4:5]
; SI-NEXT: v_mov_b32_e32 v1, s3
; SI-NEXT: v_cndmask_b32_e64 v0, -v1, v0, s[4:5]
; SI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[4:5]
; SI-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[4:5]
; SI-NEXT: v_mov_b32_e32 v3, s1
; SI-NEXT: v_mov_b32_e32 v2, s0
; SI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; SI-NEXT: s_endpgm
Expand All @@ -3243,19 +3243,17 @@ define amdgpu_kernel void @s_fneg_select_infloop_regression_v2f32(<2 x float> %a
; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; VI-NEXT: v_bfrev_b32_e32 v0, 1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_and_b32 s4, 1, s4
; VI-NEXT: s_cselect_b32 s2, 0, s2
; VI-NEXT: s_xor_b32 s2, s2, 0x80000000
; VI-NEXT: s_cmp_eq_u32 s4, 1
; VI-NEXT: s_cselect_b32 s3, 0, s3
; VI-NEXT: s_cselect_b32 s2, 0, s2
; VI-NEXT: s_xor_b32 s3, s3, 0x80000000
; VI-NEXT: s_cmp_eq_u32 s4, 1
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: s_cselect_b32 s2, 0, s3
; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: s_bitcmp1_b32 s4, 0
; VI-NEXT: v_mov_b32_e32 v1, s2
; VI-NEXT: s_cselect_b64 s[4:5], -1, 0
; VI-NEXT: v_cndmask_b32_e64 v2, -v1, v0, s[4:5]
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_cndmask_b32_e64 v0, -v1, v0, s[4:5]
; VI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[4:5]
; VI-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[4:5]
; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
Expand All @@ -3271,11 +3269,10 @@ define <2 x float> @v_fneg_select_infloop_regression_v2f32(<2 x float> %arg, i1
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_and_b32_e32 v2, 1, v2
; GCN-NEXT: v_bfrev_b32_e32 v3, 1
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GCN-NEXT: v_cndmask_b32_e64 v1, -v1, v3, vcc
; GCN-NEXT: v_cndmask_b32_e64 v0, -v0, v3, vcc
; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
; GCN-NEXT: s_setpc_b64 s[30:31]
Expand Down
67 changes: 30 additions & 37 deletions llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
Original file line number Diff line number Diff line change
Expand Up @@ -398,19 +398,18 @@ define double @fneg_xor_select_f64(i1 %cond, double %arg0, double %arg1) {
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GCN-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GCN-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GCN-NEXT: v_cndmask_b32_e64 v1, -v4, -v2, vcc
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: fneg_xor_select_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GFX11-NEXT: v_dual_cndmask_b32 v0, v3, v1 :: v_dual_cndmask_b32 v1, v4, v2
; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e64 v1, -v4, -v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
%select = select i1 %cond, double %arg0, double %arg1
%fneg = fneg double %select
Expand All @@ -422,38 +421,38 @@ define double @fneg_xor_select_f64_multi_user(i1 %cond, double %arg0, double %ar
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v0, 1, v0
; GFX7-NEXT: v_mov_b32_e32 v7, v1
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v7, vcc
; GFX7-NEXT: v_cndmask_b32_e64 v2, -v4, -v2, vcc
; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX7-NEXT: v_xor_b32_e32 v1, 0x80000000, v2
; GFX7-NEXT: flat_store_dwordx2 v[5:6], v[0:1]
; GFX7-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX7-NEXT: v_mov_b32_e32 v1, v2
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: fneg_xor_select_f64_multi_user:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_mov_b32_e32 v7, v1
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v7, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v2, -v4, -v2, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v2
; GFX9-NEXT: global_store_dwordx2 v[5:6], v[0:1], off
; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX9-NEXT: v_mov_b32_e32 v1, v2
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: fneg_xor_select_f64_multi_user:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v7, v1 :: v_dual_and_b32 v0, 1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GFX11-NEXT: v_dual_cndmask_b32 v1, v4, v2 :: v_dual_cndmask_b32 v0, v3, v7
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v1
; GFX11-NEXT: v_cndmask_b32_e64 v2, -v4, -v2, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc_lo
; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v2
; GFX11-NEXT: global_store_b64 v[5:6], v[0:1], off
; GFX11-NEXT: v_mov_b32_e32 v1, v2
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
Expand Down Expand Up @@ -497,14 +496,13 @@ define double @select_fneg_select_fneg_f64(i1 %cond0, i1 %cond1, double %arg0, d
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
; GCN-NEXT: v_and_b32_e32 v1, 1, v1
; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GCN-NEXT: v_and_b32_e32 v1, 1, v1
; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc
; GCN-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc
; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, -v3, -v5, vcc
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
; GCN-NEXT: v_cndmask_b32_e64 v1, -v2, v2, vcc
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: select_fneg_select_fneg_f64:
Expand All @@ -513,16 +511,13 @@ define double @select_fneg_select_fneg_f64(i1 %cond0, i1 %cond1, double %arg0, d
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_dual_cndmask_b32 v0, v2, v4 :: v_dual_and_b32 v1, 1, v1
; GFX11-NEXT: v_cndmask_b32_e64 v2, -v3, -v5, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e64 v1, -v2, v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fneg0 = fneg double %arg0
%select0 = select i1 %cond0, double %arg1, double %fneg0
Expand Down Expand Up @@ -894,10 +889,9 @@ define double @cospiD_pattern1(i32 %arg, double %arg1, double %arg2) {
; GCN-NEXT: v_and_b32_e32 v5, 1, v0
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
; GCN-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc
; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc
; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v1
; GCN-NEXT: v_cndmask_b32_e64 v1, -v2, -v4, vcc
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 1, v0
; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GCN-NEXT: v_cndmask_b32_e64 v1, -v1, v1, vcc
; GCN-NEXT: v_mov_b32_e32 v0, v3
; GCN-NEXT: s_setpc_b64 s[30:31]
;
Expand All @@ -909,12 +903,11 @@ define double @cospiD_pattern1(i32 %arg, double %arg1, double %arg2) {
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5
; GFX11-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e64 v1, -v2, -v4, vcc_lo
; GFX11-NEXT: v_cmp_lt_i32_e32 vcc_lo, 1, v0
; GFX11-NEXT: v_mov_b32_e32 v0, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v1
; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-NEXT: v_cndmask_b32_e64 v1, -v1, v1, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
%i = and i32 %arg, 1
%i3 = icmp eq i32 %i, 0
Expand Down

0 comments on commit f608ac6

Please sign in to comment.