@@ -140,12 +140,11 @@ define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
140
140
; GCN-IR-NEXT: s_sub_u32 s8, s10, s14
141
141
; GCN-IR-NEXT: s_subb_u32 s9, 0, 0
142
142
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[8:9], 63
143
- ; GCN-IR-NEXT: s_mov_b32 s11, 0
143
+ ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[8:9], 63
144
144
; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[16:17]
145
- ; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[16:17], s[8:9], 63
146
- ; GCN-IR-NEXT: s_xor_b64 s[18:19], s[12:13], -1
147
- ; GCN-IR-NEXT: s_and_b64 s[16:17], s[18:19], s[16:17]
148
- ; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17]
145
+ ; GCN-IR-NEXT: s_or_b64 s[16:17], s[12:13], s[18:19]
146
+ ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17]
147
+ ; GCN-IR-NEXT: s_mov_b32 s11, 0
149
148
; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5
150
149
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
151
150
; GCN-IR-NEXT: s_add_u32 s12, s8, 1
@@ -202,8 +201,8 @@ define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
202
201
; GCN-IR-NEXT: v_mul_lo_u32 v3, s5, v0
203
202
; GCN-IR-NEXT: v_mul_lo_u32 v0, s4, v0
204
203
; GCN-IR-NEXT: s_mov_b32 s11, 0xf000
205
- ; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v2, v1
206
- ; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v3
204
+ ; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v2
205
+ ; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v3, v1
207
206
; GCN-IR-NEXT: v_mov_b32_e32 v2, s3
208
207
; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
209
208
; GCN-IR-NEXT: s_mov_b32 s10, -1
@@ -505,7 +504,7 @@ define amdgpu_kernel void @s_test_srem23_64(i64 addrspace(1)* %out, i64 %x, i64
505
504
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0|
506
505
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
507
506
; GCN-IR-NEXT: s_mov_b32 s5, s1
508
- ; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
507
+ ; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
509
508
; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4
510
509
; GCN-IR-NEXT: s_mov_b32 s4, s0
511
510
; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
@@ -576,7 +575,7 @@ define amdgpu_kernel void @s_test_srem24_64(i64 addrspace(1)* %out, i64 %x, i64
576
575
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0|
577
576
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
578
577
; GCN-IR-NEXT: s_mov_b32 s5, s1
579
- ; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
578
+ ; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
580
579
; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4
581
580
; GCN-IR-NEXT: s_mov_b32 s4, s0
582
581
; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
@@ -701,7 +700,7 @@ define amdgpu_kernel void @s_test_srem25_64(i64 addrspace(1)* %out, i64 %x, i64
701
700
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0|
702
701
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
703
702
; GCN-IR-NEXT: s_mov_b32 s5, s1
704
- ; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
703
+ ; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
705
704
; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4
706
705
; GCN-IR-NEXT: s_mov_b32 s4, s0
707
706
; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
@@ -839,7 +838,7 @@ define amdgpu_kernel void @s_test_srem32_64(i64 addrspace(1)* %out, i64 %x, i64
839
838
; GCN-IR-NEXT: v_cvt_i32_f32_e32 v2, v2
840
839
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0|
841
840
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
842
- ; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
841
+ ; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
843
842
; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4
844
843
; GCN-IR-NEXT: s_mov_b32 s4, s0
845
844
; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s3, v0
@@ -1021,12 +1020,11 @@ define amdgpu_kernel void @s_test_srem33_64(i64 addrspace(1)* %out, i64 %x, i64
1021
1020
; GCN-IR-NEXT: s_sub_u32 s10, s12, s16
1022
1021
; GCN-IR-NEXT: s_subb_u32 s11, 0, 0
1023
1022
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[18:19], s[10:11], 63
1024
- ; GCN-IR-NEXT: s_mov_b32 s13, 0
1023
+ ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[20:21], s[10:11], 63
1025
1024
; GCN-IR-NEXT: s_or_b64 s[14:15], s[14:15], s[18:19]
1026
- ; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[18:19], s[10:11], 63
1027
- ; GCN-IR-NEXT: s_xor_b64 s[20:21], s[14:15], -1
1028
- ; GCN-IR-NEXT: s_and_b64 s[18:19], s[20:21], s[18:19]
1029
- ; GCN-IR-NEXT: s_and_b64 vcc, exec, s[18:19]
1025
+ ; GCN-IR-NEXT: s_or_b64 s[18:19], s[14:15], s[20:21]
1026
+ ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[18:19]
1027
+ ; GCN-IR-NEXT: s_mov_b32 s13, 0
1030
1028
; GCN-IR-NEXT: s_cbranch_vccz .LBB8_5
1031
1029
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
1032
1030
; GCN-IR-NEXT: s_add_u32 s14, s10, 1
@@ -1174,12 +1172,11 @@ define amdgpu_kernel void @s_test_srem24_48(i48 addrspace(1)* %out, i48 %x, i48
1174
1172
; GCN-IR-NEXT: s_sub_u32 s10, s12, s16
1175
1173
; GCN-IR-NEXT: s_subb_u32 s11, 0, 0
1176
1174
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[18:19], s[10:11], 63
1177
- ; GCN-IR-NEXT: s_mov_b32 s13, 0
1175
+ ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[20:21], s[10:11], 63
1178
1176
; GCN-IR-NEXT: s_or_b64 s[14:15], s[14:15], s[18:19]
1179
- ; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[18:19], s[10:11], 63
1180
- ; GCN-IR-NEXT: s_xor_b64 s[20:21], s[14:15], -1
1181
- ; GCN-IR-NEXT: s_and_b64 s[18:19], s[20:21], s[18:19]
1182
- ; GCN-IR-NEXT: s_and_b64 vcc, exec, s[18:19]
1177
+ ; GCN-IR-NEXT: s_or_b64 s[18:19], s[14:15], s[20:21]
1178
+ ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[18:19]
1179
+ ; GCN-IR-NEXT: s_mov_b32 s13, 0
1183
1180
; GCN-IR-NEXT: s_cbranch_vccz .LBB9_5
1184
1181
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
1185
1182
; GCN-IR-NEXT: s_add_u32 s14, s10, 1
@@ -1376,20 +1373,19 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(i64 addrspace(1)* %out, i64 %x)
1376
1373
; GCN-IR-NEXT: s_xor_b64 s[2:3], s[2:3], s[6:7]
1377
1374
; GCN-IR-NEXT: s_sub_u32 s4, s2, s6
1378
1375
; GCN-IR-NEXT: s_subb_u32 s5, s3, s6
1379
- ; GCN-IR-NEXT: s_flbit_i32_b32 s2 , s4
1380
- ; GCN-IR-NEXT: s_add_i32 s2, s2 , 32
1381
- ; GCN-IR-NEXT: s_flbit_i32_b32 s3 , s5
1382
- ; GCN-IR-NEXT: s_min_u32 s8, s2, s3
1376
+ ; GCN-IR-NEXT: s_flbit_i32_b32 s6 , s4
1377
+ ; GCN-IR-NEXT: s_add_i32 s6, s6 , 32
1378
+ ; GCN-IR-NEXT: s_flbit_i32_b32 s7 , s5
1379
+ ; GCN-IR-NEXT: s_min_u32 s8, s6, s7
1383
1380
; GCN-IR-NEXT: s_add_u32 s6, s8, 0xffffffc5
1384
1381
; GCN-IR-NEXT: s_addc_u32 s7, 0, -1
1385
- ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[4:5], 0
1386
- ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[6:7], 63
1382
+ ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[2:3], s[4:5], 0
1383
+ ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[6:7], 63
1384
+ ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63
1385
+ ; GCN-IR-NEXT: s_or_b64 s[10:11], s[2:3], s[10:11]
1386
+ ; GCN-IR-NEXT: s_or_b64 s[2:3], s[10:11], s[12:13]
1387
+ ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[2:3]
1387
1388
; GCN-IR-NEXT: s_mov_b64 s[2:3], 0
1388
- ; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13]
1389
- ; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[12:13], s[6:7], 63
1390
- ; GCN-IR-NEXT: s_xor_b64 s[14:15], s[10:11], -1
1391
- ; GCN-IR-NEXT: s_and_b64 s[12:13], s[14:15], s[12:13]
1392
- ; GCN-IR-NEXT: s_and_b64 vcc, exec, s[12:13]
1393
1389
; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5
1394
1390
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
1395
1391
; GCN-IR-NEXT: s_add_u32 s10, s6, 1
@@ -1993,7 +1989,7 @@ define amdgpu_kernel void @s_test_srem24_k_num_i64(i64 addrspace(1)* %out, i64 %
1993
1989
; GCN-IR-NEXT: v_cvt_i32_f32_e32 v1, v1
1994
1990
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, |v0|
1995
1991
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
1996
- ; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v1, v0
1992
+ ; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v1
1997
1993
; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4
1998
1994
; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, 24, v0
1999
1995
; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24
@@ -2055,7 +2051,7 @@ define amdgpu_kernel void @s_test_srem24_k_den_i64(i64 addrspace(1)* %out, i64 %
2055
2051
; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s4
2056
2052
; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
2057
2053
; GCN-IR-NEXT: s_movk_i32 s3, 0x5b7f
2058
- ; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0
2054
+ ; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2
2059
2055
; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s3
2060
2056
; GCN-IR-NEXT: s_mov_b32 s4, s0
2061
2057
; GCN-IR-NEXT: s_mov_b32 s5, s1
0 commit comments