Skip to content

Commit

Permalink
[MachineSink] Allow sinking of constant or ignorable physreg uses
Browse files Browse the repository at this point in the history
For AMDGPU, any use of the physical register EXEC prevents sinking even if it is not a real physical register read. Add check to see if a physical
register use can be ignored for sinking.

Also perform same constant and ignorable physical register check when considering sinking in loops.

https://reviews.llvm.org/D116053
  • Loading branch information
vangthao95 committed Jan 18, 2022
1 parent 67ac3f1 commit 10ed1ec
Show file tree
Hide file tree
Showing 11 changed files with 1,280 additions and 535 deletions.
2 changes: 1 addition & 1 deletion llvm/include/llvm/CodeGen/TargetInstrInfo.h
Expand Up @@ -130,7 +130,7 @@ class TargetInstrInfo : public MCInstrInfo {
}

/// Given \p MO is a PhysReg use return if it can be ignored for the purpose
/// of instruction rematerialization.
/// of instruction rematerialization or sinking.
virtual bool isIgnorableUse(const MachineOperand &MO) const {
return false;
}
Expand Down
11 changes: 8 additions & 3 deletions llvm/lib/CodeGen/MachineSink.cpp
Expand Up @@ -796,9 +796,14 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
if (Reg == 0)
continue;

// Don't handle physical register.
if (Register::isPhysicalRegister(Reg))
if (Register::isPhysicalRegister(Reg)) {
if (MO.isUse() &&
(MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO)))
continue;

// Don't handle non-constant and non-ignorable physical register.
return false;
}

// Users for the defs are all dominated by SuccToSinkTo.
if (MO.isDef()) {
Expand Down Expand Up @@ -898,7 +903,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
// it could get allocated to something with a def during allocation.
if (!MRI->isConstantPhysReg(Reg))
if (!MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO))
return nullptr;
} else if (!MO.isDead()) {
// A def that isn't dead. We can't move it.
Expand Down
220 changes: 110 additions & 110 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
Expand Up @@ -2962,19 +2962,18 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
; CGP-LABEL: v_sdiv_v2i64_pow2_shl_denom:
; CGP: ; %bb.0:
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CGP-NEXT: s_mov_b64 s[4:5], 0x1000
; CGP-NEXT: s_mov_b64 s[6:7], 0x1000
; CGP-NEXT: v_mov_b32_e32 v5, v2
; CGP-NEXT: v_mov_b32_e32 v7, v3
; CGP-NEXT: v_lshl_b64 v[2:3], s[4:5], v4
; CGP-NEXT: v_lshl_b64 v[2:3], s[6:7], v4
; CGP-NEXT: v_mov_b32_e32 v9, v1
; CGP-NEXT: v_mov_b32_e32 v8, v0
; CGP-NEXT: v_or_b32_e32 v1, v9, v3
; CGP-NEXT: v_mov_b32_e32 v0, 0
; CGP-NEXT: v_lshl_b64 v[10:11], s[4:5], v6
; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
; CGP-NEXT: s_cbranch_execz .LBB8_2
; CGP-NEXT: ; %bb.1:
; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v3
Expand All @@ -2984,134 +2983,134 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
; CGP-NEXT: v_xor_b32_e32 v2, v2, v0
; CGP-NEXT: v_cvt_f32_u32_e32 v3, v1
; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2
; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v9
; CGP-NEXT: v_ashrrev_i32_e32 v10, 31, v9
; CGP-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4
; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3
; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v6
; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v6, vcc
; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v10
; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v10, vcc
; CGP-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3
; CGP-NEXT: v_mul_f32_e32 v9, 0x2f800000, v3
; CGP-NEXT: v_trunc_f32_e32 v9, v9
; CGP-NEXT: v_mac_f32_e32 v3, 0xcf800000, v9
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9
; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v1
; CGP-NEXT: v_subb_u32_e32 v13, vcc, 0, v2, vcc
; CGP-NEXT: v_mul_lo_u32 v14, v13, v3
; CGP-NEXT: v_mul_lo_u32 v15, v12, v9
; CGP-NEXT: v_mul_hi_u32 v17, v12, v3
; CGP-NEXT: v_mul_lo_u32 v16, v12, v3
; CGP-NEXT: v_xor_b32_e32 v4, v4, v6
; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15
; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v1
; CGP-NEXT: v_subb_u32_e32 v12, vcc, 0, v2, vcc
; CGP-NEXT: v_mul_lo_u32 v13, v12, v3
; CGP-NEXT: v_mul_lo_u32 v14, v11, v9
; CGP-NEXT: v_mul_hi_u32 v16, v11, v3
; CGP-NEXT: v_mul_lo_u32 v15, v11, v3
; CGP-NEXT: v_xor_b32_e32 v4, v4, v10
; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14
; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16
; CGP-NEXT: v_mul_lo_u32 v14, v9, v15
; CGP-NEXT: v_mul_lo_u32 v16, v3, v13
; CGP-NEXT: v_mul_hi_u32 v17, v3, v15
; CGP-NEXT: v_mul_hi_u32 v15, v9, v15
; CGP-NEXT: v_xor_b32_e32 v8, v8, v10
; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16
; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17
; CGP-NEXT: v_mul_lo_u32 v15, v9, v16
; CGP-NEXT: v_mul_lo_u32 v17, v3, v14
; CGP-NEXT: v_mul_hi_u32 v18, v3, v16
; CGP-NEXT: v_mul_hi_u32 v16, v9, v16
; CGP-NEXT: v_xor_b32_e32 v8, v8, v6
; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17
; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v18
; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
; CGP-NEXT: v_mul_lo_u32 v18, v9, v14
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
; CGP-NEXT: v_mul_lo_u32 v17, v9, v13
; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14
; CGP-NEXT: v_mul_hi_u32 v16, v3, v13
; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v15
; CGP-NEXT: v_mul_hi_u32 v17, v3, v14
; CGP-NEXT: v_add_i32_e32 v16, vcc, v18, v16
; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v17
; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v17, vcc, v18, v17
; CGP-NEXT: v_mul_hi_u32 v14, v9, v14
; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15
; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16
; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16
; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v15
; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v14, vcc
; CGP-NEXT: v_mul_lo_u32 v13, v13, v3
; CGP-NEXT: v_mul_lo_u32 v14, v12, v9
; CGP-NEXT: v_mul_lo_u32 v15, v12, v3
; CGP-NEXT: v_mul_hi_u32 v12, v12, v3
; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14
; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
; CGP-NEXT: v_mul_lo_u32 v13, v9, v15
; CGP-NEXT: v_mul_lo_u32 v14, v3, v12
; CGP-NEXT: v_mul_hi_u32 v16, v3, v15
; CGP-NEXT: v_mul_hi_u32 v15, v9, v15
; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; CGP-NEXT: v_mul_lo_u32 v16, v9, v12
; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
; CGP-NEXT: v_mul_hi_u32 v14, v3, v12
; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15
; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
; CGP-NEXT: v_mul_hi_u32 v13, v9, v13
; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14
; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15
; CGP-NEXT: v_mul_hi_u32 v12, v9, v12
; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14
; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v13
; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v12, vcc
; CGP-NEXT: v_mul_lo_u32 v12, v8, v3
; CGP-NEXT: v_mul_lo_u32 v13, v4, v9
; CGP-NEXT: v_mul_hi_u32 v14, v4, v3
; CGP-NEXT: v_mul_hi_u32 v3, v8, v3
; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v14
; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v13, vcc
; CGP-NEXT: v_mul_lo_u32 v12, v12, v3
; CGP-NEXT: v_mul_lo_u32 v13, v11, v9
; CGP-NEXT: v_mul_lo_u32 v14, v11, v3
; CGP-NEXT: v_mul_hi_u32 v11, v11, v3
; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13
; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
; CGP-NEXT: v_mul_lo_u32 v12, v9, v14
; CGP-NEXT: v_mul_lo_u32 v13, v3, v11
; CGP-NEXT: v_mul_hi_u32 v15, v3, v14
; CGP-NEXT: v_mul_hi_u32 v14, v9, v14
; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14
; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15
; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
; CGP-NEXT: v_mul_lo_u32 v14, v8, v9
; CGP-NEXT: v_mul_lo_u32 v15, v9, v11
; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
; CGP-NEXT: v_mul_hi_u32 v13, v4, v9
; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3
; CGP-NEXT: v_mul_hi_u32 v13, v3, v11
; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14
; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v13
; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14
; CGP-NEXT: v_mul_hi_u32 v11, v9, v11
; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
; CGP-NEXT: v_mul_hi_u32 v9, v8, v9
; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v12
; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v11, vcc
; CGP-NEXT: v_mul_lo_u32 v11, v8, v3
; CGP-NEXT: v_mul_lo_u32 v12, v4, v9
; CGP-NEXT: v_mul_hi_u32 v13, v4, v3
; CGP-NEXT: v_mul_hi_u32 v3, v8, v3
; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13
; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; CGP-NEXT: v_mul_lo_u32 v13, v8, v9
; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
; CGP-NEXT: v_mul_hi_u32 v12, v4, v9
; CGP-NEXT: v_add_i32_e32 v3, vcc, v13, v3
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v12
; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
; CGP-NEXT: v_mul_lo_u32 v12, v2, v3
; CGP-NEXT: v_mul_lo_u32 v13, v1, v9
; CGP-NEXT: v_mul_hi_u32 v15, v1, v3
; CGP-NEXT: v_mul_lo_u32 v14, v1, v3
; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13
; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15
; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v14
; CGP-NEXT: v_subb_u32_e64 v13, s[4:5], v8, v12, vcc
; CGP-NEXT: v_sub_i32_e64 v8, s[4:5], v8, v12
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v13, v2
; CGP-NEXT: v_mul_hi_u32 v9, v8, v9
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v11
; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
; CGP-NEXT: v_mul_lo_u32 v11, v2, v3
; CGP-NEXT: v_mul_lo_u32 v12, v1, v9
; CGP-NEXT: v_mul_hi_u32 v14, v1, v3
; CGP-NEXT: v_mul_lo_u32 v13, v1, v3
; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v14
; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v13
; CGP-NEXT: v_subb_u32_e64 v12, s[4:5], v8, v11, vcc
; CGP-NEXT: v_sub_i32_e64 v8, s[4:5], v8, v11
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v2
; CGP-NEXT: v_subb_u32_e32 v8, vcc, v8, v2, vcc
; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5]
; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v1
; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v1
; CGP-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v8, vcc
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v13, v2
; CGP-NEXT: v_add_i32_e32 v13, vcc, 1, v3
; CGP-NEXT: v_cndmask_b32_e64 v12, v12, v14, s[4:5]
; CGP-NEXT: v_addc_u32_e32 v14, vcc, 0, v9, vcc
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v12, v2
; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v3
; CGP-NEXT: v_cndmask_b32_e64 v11, v11, v13, s[4:5]
; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v9, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v2
; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1
; CGP-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v8, v2
; CGP-NEXT: v_cndmask_b32_e32 v1, v15, v1, vcc
; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v13
; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v14, vcc
; CGP-NEXT: v_cndmask_b32_e32 v1, v14, v1, vcc
; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v12
; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v13, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
; CGP-NEXT: v_cndmask_b32_e32 v1, v13, v2, vcc
; CGP-NEXT: v_cndmask_b32_e32 v2, v14, v4, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12
; CGP-NEXT: v_cndmask_b32_e32 v1, v12, v2, vcc
; CGP-NEXT: v_cndmask_b32_e32 v2, v13, v4, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11
; CGP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; CGP-NEXT: v_xor_b32_e32 v3, v6, v0
; CGP-NEXT: v_xor_b32_e32 v3, v10, v0
; CGP-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
; CGP-NEXT: v_xor_b32_e32 v0, v1, v3
; CGP-NEXT: v_xor_b32_e32 v1, v2, v3
Expand All @@ -3120,8 +3119,9 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
; CGP-NEXT: ; implicit-def: $vgpr8
; CGP-NEXT: .LBB8_2: ; %Flow2
; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7]
; CGP-NEXT: s_xor_b64 exec, exec, s[6:7]
; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[8:9]
; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6
; CGP-NEXT: s_xor_b64 exec, exec, s[8:9]
; CGP-NEXT: s_cbranch_execz .LBB8_4
; CGP-NEXT: ; %bb.3:
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v2
Expand All @@ -3145,18 +3145,18 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; CGP-NEXT: v_mov_b32_e32 v1, 0
; CGP-NEXT: .LBB8_4:
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
; CGP-NEXT: v_or_b32_e32 v3, v7, v11
; CGP-NEXT: s_or_b64 exec, exec, s[8:9]
; CGP-NEXT: v_or_b32_e32 v3, v7, v10
; CGP-NEXT: v_mov_b32_e32 v2, 0
; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
; CGP-NEXT: s_cbranch_execz .LBB8_6
; CGP-NEXT: ; %bb.5:
; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v11
; CGP-NEXT: v_add_i32_e32 v3, vcc, v10, v2
; CGP-NEXT: v_addc_u32_e32 v4, vcc, v11, v2, vcc
; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v10
; CGP-NEXT: v_add_i32_e32 v3, vcc, v9, v2
; CGP-NEXT: v_addc_u32_e32 v4, vcc, v10, v2, vcc
; CGP-NEXT: v_xor_b32_e32 v3, v3, v2
; CGP-NEXT: v_xor_b32_e32 v4, v4, v2
; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3
Expand Down Expand Up @@ -3294,31 +3294,31 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
; CGP-NEXT: v_xor_b32_e32 v3, v4, v5
; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5
; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v5, vcc
; CGP-NEXT: ; implicit-def: $vgpr10_vgpr11
; CGP-NEXT: ; implicit-def: $vgpr9_vgpr10
; CGP-NEXT: ; implicit-def: $vgpr5
; CGP-NEXT: .LBB8_6: ; %Flow
; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7]
; CGP-NEXT: s_xor_b64 exec, exec, s[6:7]
; CGP-NEXT: s_cbranch_execz .LBB8_8
; CGP-NEXT: ; %bb.7:
; CGP-NEXT: v_cvt_f32_u32_e32 v2, v10
; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v10
; CGP-NEXT: v_cvt_f32_u32_e32 v2, v9
; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v9
; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2
; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
; CGP-NEXT: v_mul_lo_u32 v3, v3, v2
; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; CGP-NEXT: v_mul_hi_u32 v2, v5, v2
; CGP-NEXT: v_mul_lo_u32 v3, v2, v10
; CGP-NEXT: v_mul_lo_u32 v3, v2, v9
; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
; CGP-NEXT: v_sub_i32_e32 v3, vcc, v5, v3
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v10
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v9
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v10
; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v9
; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v10
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v9
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; CGP-NEXT: v_mov_b32_e32 v3, 0
; CGP-NEXT: .LBB8_8:
Expand Down

0 comments on commit 10ed1ec

Please sign in to comment.