Skip to content

Commit 858fe86

Browse files
committed
Expand Div/Rem: consider the case where the dividend is zero
So we can't use ctlz in poison-producing mode
1 parent 9599393 commit 858fe86

File tree

9 files changed

+123
-137
lines changed

9 files changed

+123
-137
lines changed

llvm/lib/Transforms/Utils/IntegerDivision.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -214,10 +214,10 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
214214
// ; %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %dividend, i1 true)
215215
// ; %sr = sub nsw i32 %tmp0, %tmp1
216216
// ; %ret0_4 = icmp ugt i32 %sr, 31
217-
// ; %ret0 = or i1 %ret0_3, %ret0_4
217+
// ; %ret0 = select i1 %ret0_3, i1 true, i1 %ret0_4
218218
// ; %retDividend = icmp eq i32 %sr, 31
219219
// ; %retVal = select i1 %ret0, i32 0, i32 %dividend
220-
// ; %earlyRet = or i1 %ret0, %retDividend
220+
// ; %earlyRet = select i1 %ret0, i1 true, %retDividend
221221
// ; br i1 %earlyRet, label %end, label %bb1
222222
Builder.SetInsertPoint(SpecialCases);
223223
Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero);
@@ -227,10 +227,10 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
227227
Value *Tmp1 = Builder.CreateCall(CTLZ, {Dividend, True});
228228
Value *SR = Builder.CreateSub(Tmp0, Tmp1);
229229
Value *Ret0_4 = Builder.CreateICmpUGT(SR, MSB);
230-
Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4);
230+
Value *Ret0 = Builder.CreateLogicalOr(Ret0_3, Ret0_4);
231231
Value *RetDividend = Builder.CreateICmpEQ(SR, MSB);
232232
Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend);
233-
Value *EarlyRet = Builder.CreateOr(Ret0, RetDividend);
233+
Value *EarlyRet = Builder.CreateLogicalOr(Ret0, RetDividend);
234234
Builder.CreateCondBr(EarlyRet, End, BB1);
235235

236236
// ; bb1: ; preds = %special-cases

llvm/test/CodeGen/AMDGPU/sdiv64.ll

Lines changed: 26 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -168,12 +168,11 @@ define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
168168
; GCN-IR-NEXT: s_sub_u32 s10, s14, s18
169169
; GCN-IR-NEXT: s_subb_u32 s11, 0, 0
170170
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[20:21], s[10:11], 63
171-
; GCN-IR-NEXT: s_mov_b32 s15, 0
171+
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[22:23], s[10:11], 63
172172
; GCN-IR-NEXT: s_or_b64 s[16:17], s[16:17], s[20:21]
173-
; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[20:21], s[10:11], 63
174-
; GCN-IR-NEXT: s_xor_b64 s[22:23], s[16:17], -1
175-
; GCN-IR-NEXT: s_and_b64 s[20:21], s[22:23], s[20:21]
176-
; GCN-IR-NEXT: s_and_b64 vcc, exec, s[20:21]
173+
; GCN-IR-NEXT: s_or_b64 s[20:21], s[16:17], s[22:23]
174+
; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[20:21]
175+
; GCN-IR-NEXT: s_mov_b32 s15, 0
177176
; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5
178177
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
179178
; GCN-IR-NEXT: s_add_u32 s16, s10, 1
@@ -524,7 +523,7 @@ define amdgpu_kernel void @s_test_sdiv24_64(i64 addrspace(1)* %out, i64 %x, i64
524523
; GCN-IR-NEXT: v_mov_b32_e32 v3, s4
525524
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0|
526525
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
527-
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
526+
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
528527
; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24
529528
; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0
530529
; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@@ -694,7 +693,7 @@ define amdgpu_kernel void @s_test_sdiv31_64(i64 addrspace(1)* %out, i64 %x, i64
694693
; GCN-IR-NEXT: v_mov_b32_e32 v3, s4
695694
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0|
696695
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
697-
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
696+
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
698697
; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 31
699698
; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0
700699
; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@@ -761,7 +760,7 @@ define amdgpu_kernel void @s_test_sdiv23_64(i64 addrspace(1)* %out, i64 %x, i64
761760
; GCN-IR-NEXT: v_mov_b32_e32 v3, s4
762761
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0|
763762
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
764-
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
763+
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
765764
; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 23
766765
; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0
767766
; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@@ -828,7 +827,7 @@ define amdgpu_kernel void @s_test_sdiv25_64(i64 addrspace(1)* %out, i64 %x, i64
828827
; GCN-IR-NEXT: v_mov_b32_e32 v3, s4
829828
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0|
830829
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
831-
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
830+
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
832831
; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 25
833832
; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0
834833
; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@@ -910,7 +909,7 @@ define amdgpu_kernel void @s_test_sdiv24_v2i64(<2 x i64> addrspace(1)* %out, <2
910909
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0|
911910
; GCN-IR-NEXT: s_ashr_i64 s[10:11], s[10:11], 40
912911
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
913-
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
912+
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
914913
; GCN-IR-NEXT: v_cvt_f32_i32_e32 v2, s10
915914
; GCN-IR-NEXT: s_ashr_i64 s[6:7], s[6:7], 40
916915
; GCN-IR-NEXT: v_cvt_f32_i32_e32 v3, s6
@@ -926,7 +925,7 @@ define amdgpu_kernel void @s_test_sdiv24_v2i64(<2 x i64> addrspace(1)* %out, <2
926925
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v2|
927926
; GCN-IR-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc
928927
; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24
929-
; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, v2, v4
928+
; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, v4, v2
930929
; GCN-IR-NEXT: v_bfe_i32 v2, v2, 0, 24
931930
; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0
932931
; GCN-IR-NEXT: v_ashrrev_i32_e32 v3, 31, v2
@@ -1008,12 +1007,11 @@ define amdgpu_kernel void @s_test_sdiv24_48(i48 addrspace(1)* %out, i48 %x, i48
10081007
; GCN-IR-NEXT: s_sub_u32 s10, s14, s18
10091008
; GCN-IR-NEXT: s_subb_u32 s11, 0, 0
10101009
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[20:21], s[10:11], 63
1011-
; GCN-IR-NEXT: s_mov_b32 s15, 0
1010+
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[22:23], s[10:11], 63
10121011
; GCN-IR-NEXT: s_or_b64 s[16:17], s[16:17], s[20:21]
1013-
; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[20:21], s[10:11], 63
1014-
; GCN-IR-NEXT: s_xor_b64 s[22:23], s[16:17], -1
1015-
; GCN-IR-NEXT: s_and_b64 s[20:21], s[22:23], s[20:21]
1016-
; GCN-IR-NEXT: s_and_b64 vcc, exec, s[20:21]
1012+
; GCN-IR-NEXT: s_or_b64 s[20:21], s[16:17], s[22:23]
1013+
; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[20:21]
1014+
; GCN-IR-NEXT: s_mov_b32 s15, 0
10171015
; GCN-IR-NEXT: s_cbranch_vccz .LBB9_5
10181016
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
10191017
; GCN-IR-NEXT: s_add_u32 s16, s10, 1
@@ -1208,20 +1206,19 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(i64 addrspace(1)* %out, i64 %x)
12081206
; GCN-IR-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
12091207
; GCN-IR-NEXT: s_sub_u32 s2, s2, s4
12101208
; GCN-IR-NEXT: s_subb_u32 s3, s3, s4
1211-
; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2
1212-
; GCN-IR-NEXT: s_add_i32 s6, s6, 32
1213-
; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3
1214-
; GCN-IR-NEXT: s_min_u32 s10, s6, s7
1209+
; GCN-IR-NEXT: s_flbit_i32_b32 s8, s2
1210+
; GCN-IR-NEXT: s_add_i32 s8, s8, 32
1211+
; GCN-IR-NEXT: s_flbit_i32_b32 s9, s3
1212+
; GCN-IR-NEXT: s_min_u32 s10, s8, s9
12151213
; GCN-IR-NEXT: s_add_u32 s8, s10, 0xffffffc5
12161214
; GCN-IR-NEXT: s_addc_u32 s9, 0, -1
1217-
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[2:3], 0
1218-
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[14:15], s[8:9], 63
1215+
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[2:3], 0
1216+
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[8:9], 63
1217+
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[14:15], s[8:9], 63
1218+
; GCN-IR-NEXT: s_or_b64 s[12:13], s[6:7], s[12:13]
1219+
; GCN-IR-NEXT: s_or_b64 s[6:7], s[12:13], s[14:15]
1220+
; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[6:7]
12191221
; GCN-IR-NEXT: s_mov_b64 s[6:7], 0
1220-
; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15]
1221-
; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[14:15], s[8:9], 63
1222-
; GCN-IR-NEXT: s_xor_b64 s[16:17], s[12:13], -1
1223-
; GCN-IR-NEXT: s_and_b64 s[14:15], s[16:17], s[14:15]
1224-
; GCN-IR-NEXT: s_and_b64 vcc, exec, s[14:15]
12251222
; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5
12261223
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
12271224
; GCN-IR-NEXT: s_add_u32 s12, s8, 1
@@ -1823,7 +1820,7 @@ define amdgpu_kernel void @s_test_sdiv24_k_num_i64(i64 addrspace(1)* %out, i64 %
18231820
; GCN-IR-NEXT: v_cvt_i32_f32_e32 v1, v1
18241821
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, |v0|
18251822
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
1826-
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v1, v0
1823+
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v1
18271824
; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24
18281825
; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0
18291826
; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
@@ -1880,7 +1877,7 @@ define amdgpu_kernel void @s_test_sdiv24_k_den_i64(i64 addrspace(1)* %out, i64 %
18801877
; GCN-IR-NEXT: v_mov_b32_e32 v2, s0
18811878
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s8
18821879
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
1883-
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v1, v0
1880+
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v1
18841881
; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24
18851882
; GCN-IR-NEXT: s_mov_b32 s5, s1
18861883
; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0

llvm/test/CodeGen/AMDGPU/srem64.ll

Lines changed: 30 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -140,12 +140,11 @@ define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
140140
; GCN-IR-NEXT: s_sub_u32 s8, s10, s14
141141
; GCN-IR-NEXT: s_subb_u32 s9, 0, 0
142142
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[8:9], 63
143-
; GCN-IR-NEXT: s_mov_b32 s11, 0
143+
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[8:9], 63
144144
; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[16:17]
145-
; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[16:17], s[8:9], 63
146-
; GCN-IR-NEXT: s_xor_b64 s[18:19], s[12:13], -1
147-
; GCN-IR-NEXT: s_and_b64 s[16:17], s[18:19], s[16:17]
148-
; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17]
145+
; GCN-IR-NEXT: s_or_b64 s[16:17], s[12:13], s[18:19]
146+
; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17]
147+
; GCN-IR-NEXT: s_mov_b32 s11, 0
149148
; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5
150149
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
151150
; GCN-IR-NEXT: s_add_u32 s12, s8, 1
@@ -202,8 +201,8 @@ define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
202201
; GCN-IR-NEXT: v_mul_lo_u32 v3, s5, v0
203202
; GCN-IR-NEXT: v_mul_lo_u32 v0, s4, v0
204203
; GCN-IR-NEXT: s_mov_b32 s11, 0xf000
205-
; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v2, v1
206-
; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v3
204+
; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v2
205+
; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v3, v1
207206
; GCN-IR-NEXT: v_mov_b32_e32 v2, s3
208207
; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
209208
; GCN-IR-NEXT: s_mov_b32 s10, -1
@@ -505,7 +504,7 @@ define amdgpu_kernel void @s_test_srem23_64(i64 addrspace(1)* %out, i64 %x, i64
505504
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0|
506505
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
507506
; GCN-IR-NEXT: s_mov_b32 s5, s1
508-
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
507+
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
509508
; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4
510509
; GCN-IR-NEXT: s_mov_b32 s4, s0
511510
; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
@@ -576,7 +575,7 @@ define amdgpu_kernel void @s_test_srem24_64(i64 addrspace(1)* %out, i64 %x, i64
576575
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0|
577576
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
578577
; GCN-IR-NEXT: s_mov_b32 s5, s1
579-
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
578+
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
580579
; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4
581580
; GCN-IR-NEXT: s_mov_b32 s4, s0
582581
; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
@@ -701,7 +700,7 @@ define amdgpu_kernel void @s_test_srem25_64(i64 addrspace(1)* %out, i64 %x, i64
701700
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0|
702701
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
703702
; GCN-IR-NEXT: s_mov_b32 s5, s1
704-
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
703+
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
705704
; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4
706705
; GCN-IR-NEXT: s_mov_b32 s4, s0
707706
; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
@@ -839,7 +838,7 @@ define amdgpu_kernel void @s_test_srem32_64(i64 addrspace(1)* %out, i64 %x, i64
839838
; GCN-IR-NEXT: v_cvt_i32_f32_e32 v2, v2
840839
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0|
841840
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
842-
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
841+
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
843842
; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4
844843
; GCN-IR-NEXT: s_mov_b32 s4, s0
845844
; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s3, v0
@@ -1021,12 +1020,11 @@ define amdgpu_kernel void @s_test_srem33_64(i64 addrspace(1)* %out, i64 %x, i64
10211020
; GCN-IR-NEXT: s_sub_u32 s10, s12, s16
10221021
; GCN-IR-NEXT: s_subb_u32 s11, 0, 0
10231022
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[18:19], s[10:11], 63
1024-
; GCN-IR-NEXT: s_mov_b32 s13, 0
1023+
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[20:21], s[10:11], 63
10251024
; GCN-IR-NEXT: s_or_b64 s[14:15], s[14:15], s[18:19]
1026-
; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[18:19], s[10:11], 63
1027-
; GCN-IR-NEXT: s_xor_b64 s[20:21], s[14:15], -1
1028-
; GCN-IR-NEXT: s_and_b64 s[18:19], s[20:21], s[18:19]
1029-
; GCN-IR-NEXT: s_and_b64 vcc, exec, s[18:19]
1025+
; GCN-IR-NEXT: s_or_b64 s[18:19], s[14:15], s[20:21]
1026+
; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[18:19]
1027+
; GCN-IR-NEXT: s_mov_b32 s13, 0
10301028
; GCN-IR-NEXT: s_cbranch_vccz .LBB8_5
10311029
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
10321030
; GCN-IR-NEXT: s_add_u32 s14, s10, 1
@@ -1174,12 +1172,11 @@ define amdgpu_kernel void @s_test_srem24_48(i48 addrspace(1)* %out, i48 %x, i48
11741172
; GCN-IR-NEXT: s_sub_u32 s10, s12, s16
11751173
; GCN-IR-NEXT: s_subb_u32 s11, 0, 0
11761174
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[18:19], s[10:11], 63
1177-
; GCN-IR-NEXT: s_mov_b32 s13, 0
1175+
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[20:21], s[10:11], 63
11781176
; GCN-IR-NEXT: s_or_b64 s[14:15], s[14:15], s[18:19]
1179-
; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[18:19], s[10:11], 63
1180-
; GCN-IR-NEXT: s_xor_b64 s[20:21], s[14:15], -1
1181-
; GCN-IR-NEXT: s_and_b64 s[18:19], s[20:21], s[18:19]
1182-
; GCN-IR-NEXT: s_and_b64 vcc, exec, s[18:19]
1177+
; GCN-IR-NEXT: s_or_b64 s[18:19], s[14:15], s[20:21]
1178+
; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[18:19]
1179+
; GCN-IR-NEXT: s_mov_b32 s13, 0
11831180
; GCN-IR-NEXT: s_cbranch_vccz .LBB9_5
11841181
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
11851182
; GCN-IR-NEXT: s_add_u32 s14, s10, 1
@@ -1376,20 +1373,19 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(i64 addrspace(1)* %out, i64 %x)
13761373
; GCN-IR-NEXT: s_xor_b64 s[2:3], s[2:3], s[6:7]
13771374
; GCN-IR-NEXT: s_sub_u32 s4, s2, s6
13781375
; GCN-IR-NEXT: s_subb_u32 s5, s3, s6
1379-
; GCN-IR-NEXT: s_flbit_i32_b32 s2, s4
1380-
; GCN-IR-NEXT: s_add_i32 s2, s2, 32
1381-
; GCN-IR-NEXT: s_flbit_i32_b32 s3, s5
1382-
; GCN-IR-NEXT: s_min_u32 s8, s2, s3
1376+
; GCN-IR-NEXT: s_flbit_i32_b32 s6, s4
1377+
; GCN-IR-NEXT: s_add_i32 s6, s6, 32
1378+
; GCN-IR-NEXT: s_flbit_i32_b32 s7, s5
1379+
; GCN-IR-NEXT: s_min_u32 s8, s6, s7
13831380
; GCN-IR-NEXT: s_add_u32 s6, s8, 0xffffffc5
13841381
; GCN-IR-NEXT: s_addc_u32 s7, 0, -1
1385-
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[4:5], 0
1386-
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[6:7], 63
1382+
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[2:3], s[4:5], 0
1383+
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[6:7], 63
1384+
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63
1385+
; GCN-IR-NEXT: s_or_b64 s[10:11], s[2:3], s[10:11]
1386+
; GCN-IR-NEXT: s_or_b64 s[2:3], s[10:11], s[12:13]
1387+
; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[2:3]
13871388
; GCN-IR-NEXT: s_mov_b64 s[2:3], 0
1388-
; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13]
1389-
; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[12:13], s[6:7], 63
1390-
; GCN-IR-NEXT: s_xor_b64 s[14:15], s[10:11], -1
1391-
; GCN-IR-NEXT: s_and_b64 s[12:13], s[14:15], s[12:13]
1392-
; GCN-IR-NEXT: s_and_b64 vcc, exec, s[12:13]
13931389
; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5
13941390
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
13951391
; GCN-IR-NEXT: s_add_u32 s10, s6, 1
@@ -1993,7 +1989,7 @@ define amdgpu_kernel void @s_test_srem24_k_num_i64(i64 addrspace(1)* %out, i64 %
19931989
; GCN-IR-NEXT: v_cvt_i32_f32_e32 v1, v1
19941990
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, |v0|
19951991
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
1996-
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v1, v0
1992+
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v1
19971993
; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4
19981994
; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, 24, v0
19991995
; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24
@@ -2055,7 +2051,7 @@ define amdgpu_kernel void @s_test_srem24_k_den_i64(i64 addrspace(1)* %out, i64 %
20552051
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s4
20562052
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
20572053
; GCN-IR-NEXT: s_movk_i32 s3, 0x5b7f
2058-
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
2054+
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
20592055
; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s3
20602056
; GCN-IR-NEXT: s_mov_b32 s4, s0
20612057
; GCN-IR-NEXT: s_mov_b32 s5, s1

0 commit comments

Comments
 (0)