Skip to content

Commit

Permalink
[DAG] computeKnownBits - add ISD::MULHS/MULHU/SMUL_LOHI/UMUL_LOHI han…
Browse files Browse the repository at this point in the history
…dling

Reuse the existing KnownBits multiplication code to handle the 'extend + multiply + extract high bits' pattern for multiply-high ops.

Noticed while looking at the codegen for D88785 / D98587 - the patch helps division-by-constant expansion code in particular, which suggests that we might have some further KnownBits div/rem cases we could handle - but this was far easier to implement.

Differential Revision: https://reviews.llvm.org/D98857
  • Loading branch information
RKSimon committed Mar 19, 2021
1 parent b8616e4 commit 9d2df96
Show file tree
Hide file tree
Showing 17 changed files with 319 additions and 361 deletions.
32 changes: 32 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Expand Up @@ -2979,6 +2979,38 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = KnownBits::computeForMul(Known, Known2);
break;
}
case ISD::MULHU: {
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known = KnownBits::mulhu(Known, Known2);
break;
}
case ISD::MULHS: {
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known = KnownBits::mulhs(Known, Known2);
break;
}
case ISD::UMUL_LOHI: {
assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result");
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
if (Op.getResNo() == 0)
Known = KnownBits::computeForMul(Known, Known2);
else
Known = KnownBits::mulhu(Known, Known2);
break;
}
case ISD::SMUL_LOHI: {
assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result");
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
if (Op.getResNo() == 0)
Known = KnownBits::computeForMul(Known, Known2);
else
Known = KnownBits::mulhs(Known, Known2);
break;
}
case ISD::UDIV: {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Expand Down
46 changes: 23 additions & 23 deletions llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
Expand Up @@ -609,14 +609,14 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out, i64 addrspac
; GFX7LESS-NEXT: s_cbranch_execz BB3_2
; GFX7LESS-NEXT: ; %bb.1:
; GFX7LESS-NEXT: s_mov_b32 s11, 0xf000
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; GFX7LESS-NEXT: s_mul_i32 s6, s6, 5
; GFX7LESS-NEXT: s_mov_b32 s10, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_mov_b32 s8, s2
; GFX7LESS-NEXT: s_mov_b32 s9, s3
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s2, s[6:7]
; GFX7LESS-NEXT: s_mul_i32 s3, s2, 5
; GFX7LESS-NEXT: v_mul_hi_u32_u24_e64 v2, s2, 5
; GFX7LESS-NEXT: v_mov_b32_e32 v1, s3
; GFX7LESS-NEXT: v_mov_b32_e32 v1, s6
; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0
; GFX7LESS-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX7LESS-NEXT: buffer_atomic_add_x2 v[1:2], off, s[8:11], 0 glc
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
Expand Down Expand Up @@ -651,12 +651,12 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out, i64 addrspac
; GFX89-NEXT: s_waitcnt lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s8, s2
; GFX89-NEXT: s_bcnt1_i32_b64 s2, s[6:7]
; GFX89-NEXT: v_mul_hi_u32_u24_e64 v2, s2, 5
; GFX89-NEXT: s_mul_i32 s2, s2, 5
; GFX89-NEXT: s_mov_b32 s11, 0xf000
; GFX89-NEXT: s_mov_b32 s10, -1
; GFX89-NEXT: s_mov_b32 s9, s3
; GFX89-NEXT: v_mov_b32_e32 v1, s2
; GFX89-NEXT: v_mov_b32_e32 v2, 0
; GFX89-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX89-NEXT: buffer_atomic_add_x2 v[1:2], off, s[8:11], 0 glc
; GFX89-NEXT: s_waitcnt vmcnt(0)
Expand Down Expand Up @@ -687,10 +687,10 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out, i64 addrspac
; GCN64-NEXT: s_cbranch_execz BB3_2
; GCN64-NEXT: ; %bb.1:
; GCN64-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; GCN64-NEXT: v_mov_b32_e32 v2, 0
; GCN64-NEXT: s_mul_i32 s6, s6, 5
; GCN64-NEXT: s_mov_b32 s11, 0x31016000
; GCN64-NEXT: s_mul_i32 s7, s6, 5
; GCN64-NEXT: v_mul_hi_u32_u24_e64 v2, s6, 5
; GCN64-NEXT: v_mov_b32_e32 v1, s7
; GCN64-NEXT: v_mov_b32_e32 v1, s6
; GCN64-NEXT: s_mov_b32 s10, -1
; GCN64-NEXT: s_waitcnt lgkmcnt(0)
; GCN64-NEXT: s_mov_b32 s8, s2
Expand Down Expand Up @@ -724,10 +724,10 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out, i64 addrspac
; GCN32-NEXT: s_cbranch_execz BB3_2
; GCN32-NEXT: ; %bb.1:
; GCN32-NEXT: s_bcnt1_i32_b32 s5, s5
; GCN32-NEXT: v_mov_b32_e32 v2, 0
; GCN32-NEXT: s_mul_i32 s5, s5, 5
; GCN32-NEXT: s_mov_b32 s11, 0x31016000
; GCN32-NEXT: s_mul_i32 s6, s5, 5
; GCN32-NEXT: v_mul_hi_u32_u24_e64 v2, s5, 5
; GCN32-NEXT: v_mov_b32_e32 v1, s6
; GCN32-NEXT: v_mov_b32_e32 v1, s5
; GCN32-NEXT: s_mov_b32 s10, -1
; GCN32-NEXT: s_waitcnt lgkmcnt(0)
; GCN32-NEXT: s_mov_b32 s8, s2
Expand Down Expand Up @@ -1700,14 +1700,14 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out, i64 addrspac
; GFX7LESS-NEXT: s_cbranch_execz BB9_2
; GFX7LESS-NEXT: ; %bb.1:
; GFX7LESS-NEXT: s_mov_b32 s11, 0xf000
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; GFX7LESS-NEXT: s_mul_i32 s6, s6, 5
; GFX7LESS-NEXT: s_mov_b32 s10, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_mov_b32 s8, s2
; GFX7LESS-NEXT: s_mov_b32 s9, s3
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s2, s[6:7]
; GFX7LESS-NEXT: s_mul_i32 s3, s2, 5
; GFX7LESS-NEXT: v_mul_hi_u32_u24_e64 v2, s2, 5
; GFX7LESS-NEXT: v_mov_b32_e32 v1, s3
; GFX7LESS-NEXT: v_mov_b32_e32 v1, s6
; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0
; GFX7LESS-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX7LESS-NEXT: buffer_atomic_sub_x2 v[1:2], off, s[8:11], 0 glc
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
Expand Down Expand Up @@ -1742,12 +1742,12 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out, i64 addrspac
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s8, s2
; GFX8-NEXT: s_bcnt1_i32_b64 s2, s[6:7]
; GFX8-NEXT: v_mul_hi_u32_u24_e64 v2, s2, 5
; GFX8-NEXT: s_mul_i32 s2, s2, 5
; GFX8-NEXT: s_mov_b32 s11, 0xf000
; GFX8-NEXT: s_mov_b32 s10, -1
; GFX8-NEXT: s_mov_b32 s9, s3
; GFX8-NEXT: v_mov_b32_e32 v1, s2
; GFX8-NEXT: v_mov_b32_e32 v2, 0
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX8-NEXT: buffer_atomic_sub_x2 v[1:2], off, s[8:11], 0 glc
; GFX8-NEXT: s_waitcnt vmcnt(0)
Expand Down Expand Up @@ -1781,12 +1781,12 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out, i64 addrspac
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s8, s2
; GFX9-NEXT: s_bcnt1_i32_b64 s2, s[6:7]
; GFX9-NEXT: v_mul_hi_u32_u24_e64 v2, s2, 5
; GFX9-NEXT: s_mul_i32 s2, s2, 5
; GFX9-NEXT: s_mov_b32 s11, 0xf000
; GFX9-NEXT: s_mov_b32 s10, -1
; GFX9-NEXT: s_mov_b32 s9, s3
; GFX9-NEXT: v_mov_b32_e32 v1, s2
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: buffer_atomic_sub_x2 v[1:2], off, s[8:11], 0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
Expand Down Expand Up @@ -1818,10 +1818,10 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out, i64 addrspac
; GCN64-NEXT: s_cbranch_execz BB9_2
; GCN64-NEXT: ; %bb.1:
; GCN64-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; GCN64-NEXT: v_mov_b32_e32 v2, 0
; GCN64-NEXT: s_mul_i32 s6, s6, 5
; GCN64-NEXT: s_mov_b32 s11, 0x31016000
; GCN64-NEXT: s_mul_i32 s7, s6, 5
; GCN64-NEXT: v_mul_hi_u32_u24_e64 v2, s6, 5
; GCN64-NEXT: v_mov_b32_e32 v1, s7
; GCN64-NEXT: v_mov_b32_e32 v1, s6
; GCN64-NEXT: s_mov_b32 s10, -1
; GCN64-NEXT: s_waitcnt lgkmcnt(0)
; GCN64-NEXT: s_mov_b32 s8, s2
Expand Down Expand Up @@ -1858,10 +1858,10 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out, i64 addrspac
; GCN32-NEXT: s_cbranch_execz BB9_2
; GCN32-NEXT: ; %bb.1:
; GCN32-NEXT: s_bcnt1_i32_b32 s5, s5
; GCN32-NEXT: v_mov_b32_e32 v2, 0
; GCN32-NEXT: s_mul_i32 s5, s5, 5
; GCN32-NEXT: s_mov_b32 s11, 0x31016000
; GCN32-NEXT: s_mul_i32 s6, s5, 5
; GCN32-NEXT: v_mul_hi_u32_u24_e64 v2, s5, 5
; GCN32-NEXT: v_mov_b32_e32 v1, s6
; GCN32-NEXT: v_mov_b32_e32 v1, s5
; GCN32-NEXT: s_mov_b32 s10, -1
; GCN32-NEXT: s_waitcnt lgkmcnt(0)
; GCN32-NEXT: s_mov_b32 s8, s2
Expand Down
44 changes: 22 additions & 22 deletions llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
Expand Up @@ -746,10 +746,10 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
; GFX7LESS-NEXT: s_cbranch_execz BB4_2
; GFX7LESS-NEXT: ; %bb.1:
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0
; GFX7LESS-NEXT: s_mul_i32 s4, s4, 5
; GFX7LESS-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX7LESS-NEXT: s_mul_i32 s5, s4, 5
; GFX7LESS-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
; GFX7LESS-NEXT: v_mov_b32_e32 v1, s5
; GFX7LESS-NEXT: v_mov_b32_e32 v1, s4
; GFX7LESS-NEXT: s_mov_b32 m0, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: ds_add_rtn_u64 v[1:2], v3, v[1:2]
Expand Down Expand Up @@ -781,9 +781,9 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
; GFX8-NEXT: s_cbranch_execz BB4_2
; GFX8-NEXT: ; %bb.1:
; GFX8-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX8-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
; GFX8-NEXT: s_mul_i32 s4, s4, 5
; GFX8-NEXT: v_mov_b32_e32 v1, s4
; GFX8-NEXT: v_mov_b32_e32 v2, 0
; GFX8-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX8-NEXT: s_mov_b32 m0, -1
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
Expand Down Expand Up @@ -815,9 +815,9 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
; GFX9-NEXT: s_cbranch_execz BB4_2
; GFX9-NEXT: ; %bb.1:
; GFX9-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX9-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
; GFX9-NEXT: s_mul_i32 s4, s4, 5
; GFX9-NEXT: v_mov_b32_e32 v1, s4
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: ds_add_rtn_u64 v[1:2], v3, v[1:2]
Expand Down Expand Up @@ -848,10 +848,10 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
; GFX1064-NEXT: s_cbranch_execz BB4_2
; GFX1064-NEXT: ; %bb.1:
; GFX1064-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
; GFX1064-NEXT: s_mul_i32 s4, s4, 5
; GFX1064-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX1064-NEXT: s_mul_i32 s5, s4, 5
; GFX1064-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
; GFX1064-NEXT: v_mov_b32_e32 v1, s5
; GFX1064-NEXT: v_mov_b32_e32 v1, s4
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1064-NEXT: ds_add_rtn_u64 v[1:2], v3, v[1:2]
Expand Down Expand Up @@ -880,10 +880,10 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
; GFX1032-NEXT: s_cbranch_execz BB4_2
; GFX1032-NEXT: ; %bb.1:
; GFX1032-NEXT: s_bcnt1_i32_b32 s3, s3
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-NEXT: s_mul_i32 s3, s3, 5
; GFX1032-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX1032-NEXT: s_mul_i32 s4, s3, 5
; GFX1032-NEXT: v_mul_hi_u32_u24_e64 v2, s3, 5
; GFX1032-NEXT: v_mov_b32_e32 v1, s4
; GFX1032-NEXT: v_mov_b32_e32 v1, s3
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1032-NEXT: ds_add_rtn_u64 v[1:2], v3, v[1:2]
Expand Down Expand Up @@ -1945,10 +1945,10 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) {
; GFX7LESS-NEXT: s_cbranch_execz BB11_2
; GFX7LESS-NEXT: ; %bb.1:
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0
; GFX7LESS-NEXT: s_mul_i32 s4, s4, 5
; GFX7LESS-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX7LESS-NEXT: s_mul_i32 s5, s4, 5
; GFX7LESS-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
; GFX7LESS-NEXT: v_mov_b32_e32 v1, s5
; GFX7LESS-NEXT: v_mov_b32_e32 v1, s4
; GFX7LESS-NEXT: s_mov_b32 m0, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: ds_sub_rtn_u64 v[1:2], v3, v[1:2]
Expand Down Expand Up @@ -1980,9 +1980,9 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) {
; GFX8-NEXT: s_cbranch_execz BB11_2
; GFX8-NEXT: ; %bb.1:
; GFX8-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX8-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
; GFX8-NEXT: s_mul_i32 s4, s4, 5
; GFX8-NEXT: v_mov_b32_e32 v1, s4
; GFX8-NEXT: v_mov_b32_e32 v2, 0
; GFX8-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX8-NEXT: s_mov_b32 m0, -1
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
Expand Down Expand Up @@ -2015,9 +2015,9 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) {
; GFX9-NEXT: s_cbranch_execz BB11_2
; GFX9-NEXT: ; %bb.1:
; GFX9-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX9-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
; GFX9-NEXT: s_mul_i32 s4, s4, 5
; GFX9-NEXT: v_mov_b32_e32 v1, s4
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: ds_sub_rtn_u64 v[1:2], v3, v[1:2]
Expand Down Expand Up @@ -2049,10 +2049,10 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) {
; GFX1064-NEXT: s_cbranch_execz BB11_2
; GFX1064-NEXT: ; %bb.1:
; GFX1064-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
; GFX1064-NEXT: s_mul_i32 s4, s4, 5
; GFX1064-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX1064-NEXT: s_mul_i32 s5, s4, 5
; GFX1064-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
; GFX1064-NEXT: v_mov_b32_e32 v1, s5
; GFX1064-NEXT: v_mov_b32_e32 v1, s4
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1064-NEXT: ds_sub_rtn_u64 v[1:2], v3, v[1:2]
Expand Down Expand Up @@ -2084,10 +2084,10 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) {
; GFX1032-NEXT: s_cbranch_execz BB11_2
; GFX1032-NEXT: ; %bb.1:
; GFX1032-NEXT: s_bcnt1_i32_b32 s3, s3
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-NEXT: s_mul_i32 s3, s3, 5
; GFX1032-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX1032-NEXT: s_mul_i32 s4, s3, 5
; GFX1032-NEXT: v_mul_hi_u32_u24_e64 v2, s3, 5
; GFX1032-NEXT: v_mov_b32_e32 v1, s4
; GFX1032-NEXT: v_mov_b32_e32 v1, s3
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1032-NEXT: ds_sub_rtn_u64 v[1:2], v3, v[1:2]
Expand Down
41 changes: 19 additions & 22 deletions llvm/test/CodeGen/AMDGPU/sdiv64.ll
Expand Up @@ -1202,15 +1202,14 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(i64 addrspace(1)* %out, i64 %x)
; GCN-NEXT: v_addc_u32_e64 v3, vcc, v3, v6, s[0:1]
; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v4
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GCN-NEXT: v_mul_hi_u32 v5, 24, v0
; GCN-NEXT: v_mul_lo_u32 v4, v3, 24
; GCN-NEXT: v_mul_hi_u32 v6, 24, v3
; GCN-NEXT: v_mul_hi_u32 v0, 0, v0
; GCN-NEXT: v_mul_hi_u32 v0, 24, v0
; GCN-NEXT: v_mul_hi_u32 v5, 24, v3
; GCN-NEXT: v_mul_hi_u32 v3, 0, v3
; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4
; GCN-NEXT: v_addc_u32_e32 v2, vcc, v2, v6, vcc
; GCN-NEXT: v_add_i32_e32 v4, vcc, 0, v4
; GCN-NEXT: v_addc_u32_e32 v0, vcc, v2, v0, vcc
; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v4
; GCN-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc
; GCN-NEXT: v_add_i32_e32 v0, vcc, 0, v0
; GCN-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc
; GCN-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc
; GCN-NEXT: v_mul_lo_u32 v2, s8, v1
; GCN-NEXT: v_mul_hi_u32 v3, s8, v0
Expand Down Expand Up @@ -1420,15 +1419,14 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) {
; GCN-NEXT: v_addc_u32_e64 v4, vcc, v4, v6, s[4:5]
; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5
; GCN-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc
; GCN-NEXT: v_mul_hi_u32 v6, 24, v3
; GCN-NEXT: v_mul_lo_u32 v5, v4, 24
; GCN-NEXT: v_mul_hi_u32 v7, 24, v4
; GCN-NEXT: v_mul_hi_u32 v3, 0, v3
; GCN-NEXT: v_mul_hi_u32 v3, 24, v3
; GCN-NEXT: v_mul_hi_u32 v6, 24, v4
; GCN-NEXT: v_mul_hi_u32 v4, 0, v4
; GCN-NEXT: v_add_i32_e32 v5, vcc, v6, v5
; GCN-NEXT: v_addc_u32_e32 v6, vcc, v13, v7, vcc
; GCN-NEXT: v_add_i32_e32 v5, vcc, 0, v5
; GCN-NEXT: v_addc_u32_e32 v3, vcc, v6, v3, vcc
; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5
; GCN-NEXT: v_addc_u32_e32 v5, vcc, v13, v6, vcc
; GCN-NEXT: v_add_i32_e32 v3, vcc, 0, v3
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v5, vcc
; GCN-NEXT: v_addc_u32_e32 v4, vcc, v4, v12, vcc
; GCN-NEXT: v_mul_lo_u32 v5, v0, v4
; GCN-NEXT: v_mul_hi_u32 v6, v0, v3
Expand Down Expand Up @@ -1633,15 +1631,14 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) {
; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5
; GCN-NEXT: s_mov_b32 s4, 0x8000
; GCN-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc
; GCN-NEXT: v_mul_hi_u32 v5, s4, v3
; GCN-NEXT: v_mul_hi_u32 v6, s4, v4
; GCN-NEXT: v_lshlrev_b32_e32 v7, 15, v4
; GCN-NEXT: v_mul_hi_u32 v3, 0, v3
; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v7
; GCN-NEXT: v_mul_hi_u32 v3, s4, v3
; GCN-NEXT: v_mul_hi_u32 v5, s4, v4
; GCN-NEXT: v_lshlrev_b32_e32 v6, 15, v4
; GCN-NEXT: v_mul_hi_u32 v4, 0, v4
; GCN-NEXT: v_addc_u32_e32 v6, vcc, v13, v6, vcc
; GCN-NEXT: v_add_i32_e32 v5, vcc, 0, v5
; GCN-NEXT: v_addc_u32_e32 v3, vcc, v6, v3, vcc
; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v6
; GCN-NEXT: v_addc_u32_e32 v5, vcc, v13, v5, vcc
; GCN-NEXT: v_add_i32_e32 v3, vcc, 0, v3
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v5, vcc
; GCN-NEXT: v_addc_u32_e32 v4, vcc, v4, v12, vcc
; GCN-NEXT: v_mul_lo_u32 v5, v0, v4
; GCN-NEXT: v_mul_hi_u32 v6, v0, v3
Expand Down

0 comments on commit 9d2df96

Please sign in to comment.