Skip to content

Commit

Permalink
Expand Div/Rem: consider the case where the dividend is zero
Browse files Browse the repository at this point in the history
So we can't use ctlz in poison-producing mode
  • Loading branch information
nunoplopes committed Sep 1, 2022
1 parent 9599393 commit 858fe86
Show file tree
Hide file tree
Showing 9 changed files with 123 additions and 137 deletions.
8 changes: 4 additions & 4 deletions llvm/lib/Transforms/Utils/IntegerDivision.cpp
Expand Up @@ -214,10 +214,10 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
// ; %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %dividend, i1 true)
// ; %sr = sub nsw i32 %tmp0, %tmp1
// ; %ret0_4 = icmp ugt i32 %sr, 31
// ; %ret0 = or i1 %ret0_3, %ret0_4
// ; %ret0 = select i1 %ret0_3, i1 true, i1 %ret0_4
// ; %retDividend = icmp eq i32 %sr, 31
// ; %retVal = select i1 %ret0, i32 0, i32 %dividend
// ; %earlyRet = or i1 %ret0, %retDividend
// ; %earlyRet = select i1 %ret0, i1 true, %retDividend
// ; br i1 %earlyRet, label %end, label %bb1
Builder.SetInsertPoint(SpecialCases);
Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero);
Expand All @@ -227,10 +227,10 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
Value *Tmp1 = Builder.CreateCall(CTLZ, {Dividend, True});
Value *SR = Builder.CreateSub(Tmp0, Tmp1);
Value *Ret0_4 = Builder.CreateICmpUGT(SR, MSB);
Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4);
Value *Ret0 = Builder.CreateLogicalOr(Ret0_3, Ret0_4);
Value *RetDividend = Builder.CreateICmpEQ(SR, MSB);
Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend);
Value *EarlyRet = Builder.CreateOr(Ret0, RetDividend);
Value *EarlyRet = Builder.CreateLogicalOr(Ret0, RetDividend);
Builder.CreateCondBr(EarlyRet, End, BB1);

// ; bb1: ; preds = %special-cases
Expand Down
55 changes: 26 additions & 29 deletions llvm/test/CodeGen/AMDGPU/sdiv64.ll
Expand Up @@ -168,12 +168,11 @@ define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
; GCN-IR-NEXT: s_sub_u32 s10, s14, s18
; GCN-IR-NEXT: s_subb_u32 s11, 0, 0
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[20:21], s[10:11], 63
; GCN-IR-NEXT: s_mov_b32 s15, 0
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[22:23], s[10:11], 63
; GCN-IR-NEXT: s_or_b64 s[16:17], s[16:17], s[20:21]
; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[20:21], s[10:11], 63
; GCN-IR-NEXT: s_xor_b64 s[22:23], s[16:17], -1
; GCN-IR-NEXT: s_and_b64 s[20:21], s[22:23], s[20:21]
; GCN-IR-NEXT: s_and_b64 vcc, exec, s[20:21]
; GCN-IR-NEXT: s_or_b64 s[20:21], s[16:17], s[22:23]
; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[20:21]
; GCN-IR-NEXT: s_mov_b32 s15, 0
; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
; GCN-IR-NEXT: s_add_u32 s16, s10, 1
Expand Down Expand Up @@ -524,7 +523,7 @@ define amdgpu_kernel void @s_test_sdiv24_64(i64 addrspace(1)* %out, i64 %x, i64
; GCN-IR-NEXT: v_mov_b32_e32 v3, s4
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0|
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24
; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
Expand Down Expand Up @@ -694,7 +693,7 @@ define amdgpu_kernel void @s_test_sdiv31_64(i64 addrspace(1)* %out, i64 %x, i64
; GCN-IR-NEXT: v_mov_b32_e32 v3, s4
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0|
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 31
; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
Expand Down Expand Up @@ -761,7 +760,7 @@ define amdgpu_kernel void @s_test_sdiv23_64(i64 addrspace(1)* %out, i64 %x, i64
; GCN-IR-NEXT: v_mov_b32_e32 v3, s4
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0|
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 23
; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
Expand Down Expand Up @@ -828,7 +827,7 @@ define amdgpu_kernel void @s_test_sdiv25_64(i64 addrspace(1)* %out, i64 %x, i64
; GCN-IR-NEXT: v_mov_b32_e32 v3, s4
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0|
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 25
; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
Expand Down Expand Up @@ -910,7 +909,7 @@ define amdgpu_kernel void @s_test_sdiv24_v2i64(<2 x i64> addrspace(1)* %out, <2
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0|
; GCN-IR-NEXT: s_ashr_i64 s[10:11], s[10:11], 40
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
; GCN-IR-NEXT: v_cvt_f32_i32_e32 v2, s10
; GCN-IR-NEXT: s_ashr_i64 s[6:7], s[6:7], 40
; GCN-IR-NEXT: v_cvt_f32_i32_e32 v3, s6
Expand All @@ -926,7 +925,7 @@ define amdgpu_kernel void @s_test_sdiv24_v2i64(<2 x i64> addrspace(1)* %out, <2
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v2|
; GCN-IR-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc
; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24
; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, v2, v4
; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, v4, v2
; GCN-IR-NEXT: v_bfe_i32 v2, v2, 0, 24
; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GCN-IR-NEXT: v_ashrrev_i32_e32 v3, 31, v2
Expand Down Expand Up @@ -1008,12 +1007,11 @@ define amdgpu_kernel void @s_test_sdiv24_48(i48 addrspace(1)* %out, i48 %x, i48
; GCN-IR-NEXT: s_sub_u32 s10, s14, s18
; GCN-IR-NEXT: s_subb_u32 s11, 0, 0
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[20:21], s[10:11], 63
; GCN-IR-NEXT: s_mov_b32 s15, 0
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[22:23], s[10:11], 63
; GCN-IR-NEXT: s_or_b64 s[16:17], s[16:17], s[20:21]
; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[20:21], s[10:11], 63
; GCN-IR-NEXT: s_xor_b64 s[22:23], s[16:17], -1
; GCN-IR-NEXT: s_and_b64 s[20:21], s[22:23], s[20:21]
; GCN-IR-NEXT: s_and_b64 vcc, exec, s[20:21]
; GCN-IR-NEXT: s_or_b64 s[20:21], s[16:17], s[22:23]
; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[20:21]
; GCN-IR-NEXT: s_mov_b32 s15, 0
; GCN-IR-NEXT: s_cbranch_vccz .LBB9_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
; GCN-IR-NEXT: s_add_u32 s16, s10, 1
Expand Down Expand Up @@ -1208,20 +1206,19 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(i64 addrspace(1)* %out, i64 %x)
; GCN-IR-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
; GCN-IR-NEXT: s_sub_u32 s2, s2, s4
; GCN-IR-NEXT: s_subb_u32 s3, s3, s4
; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2
; GCN-IR-NEXT: s_add_i32 s6, s6, 32
; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3
; GCN-IR-NEXT: s_min_u32 s10, s6, s7
; GCN-IR-NEXT: s_flbit_i32_b32 s8, s2
; GCN-IR-NEXT: s_add_i32 s8, s8, 32
; GCN-IR-NEXT: s_flbit_i32_b32 s9, s3
; GCN-IR-NEXT: s_min_u32 s10, s8, s9
; GCN-IR-NEXT: s_add_u32 s8, s10, 0xffffffc5
; GCN-IR-NEXT: s_addc_u32 s9, 0, -1
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[2:3], 0
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[14:15], s[8:9], 63
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[2:3], 0
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[8:9], 63
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[14:15], s[8:9], 63
; GCN-IR-NEXT: s_or_b64 s[12:13], s[6:7], s[12:13]
; GCN-IR-NEXT: s_or_b64 s[6:7], s[12:13], s[14:15]
; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[6:7]
; GCN-IR-NEXT: s_mov_b64 s[6:7], 0
; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15]
; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[14:15], s[8:9], 63
; GCN-IR-NEXT: s_xor_b64 s[16:17], s[12:13], -1
; GCN-IR-NEXT: s_and_b64 s[14:15], s[16:17], s[14:15]
; GCN-IR-NEXT: s_and_b64 vcc, exec, s[14:15]
; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
; GCN-IR-NEXT: s_add_u32 s12, s8, 1
Expand Down Expand Up @@ -1823,7 +1820,7 @@ define amdgpu_kernel void @s_test_sdiv24_k_num_i64(i64 addrspace(1)* %out, i64 %
; GCN-IR-NEXT: v_cvt_i32_f32_e32 v1, v1
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, |v0|
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24
; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
Expand Down Expand Up @@ -1880,7 +1877,7 @@ define amdgpu_kernel void @s_test_sdiv24_k_den_i64(i64 addrspace(1)* %out, i64 %
; GCN-IR-NEXT: v_mov_b32_e32 v2, s0
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s8
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24
; GCN-IR-NEXT: s_mov_b32 s5, s1
; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0
Expand Down
64 changes: 30 additions & 34 deletions llvm/test/CodeGen/AMDGPU/srem64.ll
Expand Up @@ -140,12 +140,11 @@ define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
; GCN-IR-NEXT: s_sub_u32 s8, s10, s14
; GCN-IR-NEXT: s_subb_u32 s9, 0, 0
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[8:9], 63
; GCN-IR-NEXT: s_mov_b32 s11, 0
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[8:9], 63
; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[16:17]
; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[16:17], s[8:9], 63
; GCN-IR-NEXT: s_xor_b64 s[18:19], s[12:13], -1
; GCN-IR-NEXT: s_and_b64 s[16:17], s[18:19], s[16:17]
; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17]
; GCN-IR-NEXT: s_or_b64 s[16:17], s[12:13], s[18:19]
; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17]
; GCN-IR-NEXT: s_mov_b32 s11, 0
; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
; GCN-IR-NEXT: s_add_u32 s12, s8, 1
Expand Down Expand Up @@ -202,8 +201,8 @@ define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
; GCN-IR-NEXT: v_mul_lo_u32 v3, s5, v0
; GCN-IR-NEXT: v_mul_lo_u32 v0, s4, v0
; GCN-IR-NEXT: s_mov_b32 s11, 0xf000
; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v2, v1
; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v3
; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v2
; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v3, v1
; GCN-IR-NEXT: v_mov_b32_e32 v2, s3
; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
; GCN-IR-NEXT: s_mov_b32 s10, -1
Expand Down Expand Up @@ -505,7 +504,7 @@ define amdgpu_kernel void @s_test_srem23_64(i64 addrspace(1)* %out, i64 %x, i64
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0|
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
; GCN-IR-NEXT: s_mov_b32 s5, s1
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4
; GCN-IR-NEXT: s_mov_b32 s4, s0
; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
Expand Down Expand Up @@ -576,7 +575,7 @@ define amdgpu_kernel void @s_test_srem24_64(i64 addrspace(1)* %out, i64 %x, i64
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0|
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
; GCN-IR-NEXT: s_mov_b32 s5, s1
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4
; GCN-IR-NEXT: s_mov_b32 s4, s0
; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
Expand Down Expand Up @@ -701,7 +700,7 @@ define amdgpu_kernel void @s_test_srem25_64(i64 addrspace(1)* %out, i64 %x, i64
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0|
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
; GCN-IR-NEXT: s_mov_b32 s5, s1
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4
; GCN-IR-NEXT: s_mov_b32 s4, s0
; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
Expand Down Expand Up @@ -839,7 +838,7 @@ define amdgpu_kernel void @s_test_srem32_64(i64 addrspace(1)* %out, i64 %x, i64
; GCN-IR-NEXT: v_cvt_i32_f32_e32 v2, v2
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0|
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4
; GCN-IR-NEXT: s_mov_b32 s4, s0
; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s3, v0
Expand Down Expand Up @@ -1021,12 +1020,11 @@ define amdgpu_kernel void @s_test_srem33_64(i64 addrspace(1)* %out, i64 %x, i64
; GCN-IR-NEXT: s_sub_u32 s10, s12, s16
; GCN-IR-NEXT: s_subb_u32 s11, 0, 0
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[18:19], s[10:11], 63
; GCN-IR-NEXT: s_mov_b32 s13, 0
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[20:21], s[10:11], 63
; GCN-IR-NEXT: s_or_b64 s[14:15], s[14:15], s[18:19]
; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[18:19], s[10:11], 63
; GCN-IR-NEXT: s_xor_b64 s[20:21], s[14:15], -1
; GCN-IR-NEXT: s_and_b64 s[18:19], s[20:21], s[18:19]
; GCN-IR-NEXT: s_and_b64 vcc, exec, s[18:19]
; GCN-IR-NEXT: s_or_b64 s[18:19], s[14:15], s[20:21]
; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[18:19]
; GCN-IR-NEXT: s_mov_b32 s13, 0
; GCN-IR-NEXT: s_cbranch_vccz .LBB8_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
; GCN-IR-NEXT: s_add_u32 s14, s10, 1
Expand Down Expand Up @@ -1174,12 +1172,11 @@ define amdgpu_kernel void @s_test_srem24_48(i48 addrspace(1)* %out, i48 %x, i48
; GCN-IR-NEXT: s_sub_u32 s10, s12, s16
; GCN-IR-NEXT: s_subb_u32 s11, 0, 0
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[18:19], s[10:11], 63
; GCN-IR-NEXT: s_mov_b32 s13, 0
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[20:21], s[10:11], 63
; GCN-IR-NEXT: s_or_b64 s[14:15], s[14:15], s[18:19]
; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[18:19], s[10:11], 63
; GCN-IR-NEXT: s_xor_b64 s[20:21], s[14:15], -1
; GCN-IR-NEXT: s_and_b64 s[18:19], s[20:21], s[18:19]
; GCN-IR-NEXT: s_and_b64 vcc, exec, s[18:19]
; GCN-IR-NEXT: s_or_b64 s[18:19], s[14:15], s[20:21]
; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[18:19]
; GCN-IR-NEXT: s_mov_b32 s13, 0
; GCN-IR-NEXT: s_cbranch_vccz .LBB9_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
; GCN-IR-NEXT: s_add_u32 s14, s10, 1
Expand Down Expand Up @@ -1376,20 +1373,19 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(i64 addrspace(1)* %out, i64 %x)
; GCN-IR-NEXT: s_xor_b64 s[2:3], s[2:3], s[6:7]
; GCN-IR-NEXT: s_sub_u32 s4, s2, s6
; GCN-IR-NEXT: s_subb_u32 s5, s3, s6
; GCN-IR-NEXT: s_flbit_i32_b32 s2, s4
; GCN-IR-NEXT: s_add_i32 s2, s2, 32
; GCN-IR-NEXT: s_flbit_i32_b32 s3, s5
; GCN-IR-NEXT: s_min_u32 s8, s2, s3
; GCN-IR-NEXT: s_flbit_i32_b32 s6, s4
; GCN-IR-NEXT: s_add_i32 s6, s6, 32
; GCN-IR-NEXT: s_flbit_i32_b32 s7, s5
; GCN-IR-NEXT: s_min_u32 s8, s6, s7
; GCN-IR-NEXT: s_add_u32 s6, s8, 0xffffffc5
; GCN-IR-NEXT: s_addc_u32 s7, 0, -1
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[4:5], 0
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[6:7], 63
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[2:3], s[4:5], 0
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[6:7], 63
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63
; GCN-IR-NEXT: s_or_b64 s[10:11], s[2:3], s[10:11]
; GCN-IR-NEXT: s_or_b64 s[2:3], s[10:11], s[12:13]
; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[2:3]
; GCN-IR-NEXT: s_mov_b64 s[2:3], 0
; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13]
; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[12:13], s[6:7], 63
; GCN-IR-NEXT: s_xor_b64 s[14:15], s[10:11], -1
; GCN-IR-NEXT: s_and_b64 s[12:13], s[14:15], s[12:13]
; GCN-IR-NEXT: s_and_b64 vcc, exec, s[12:13]
; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
; GCN-IR-NEXT: s_add_u32 s10, s6, 1
Expand Down Expand Up @@ -1993,7 +1989,7 @@ define amdgpu_kernel void @s_test_srem24_k_num_i64(i64 addrspace(1)* %out, i64 %
; GCN-IR-NEXT: v_cvt_i32_f32_e32 v1, v1
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, |v0|
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4
; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, 24, v0
; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24
Expand Down Expand Up @@ -2055,7 +2051,7 @@ define amdgpu_kernel void @s_test_srem24_k_den_i64(i64 addrspace(1)* %out, i64 %
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s4
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; GCN-IR-NEXT: s_movk_i32 s3, 0x5b7f
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s3
; GCN-IR-NEXT: s_mov_b32 s4, s0
; GCN-IR-NEXT: s_mov_b32 s5, s1
Expand Down

0 comments on commit 858fe86

Please sign in to comment.