Skip to content

Commit

Permalink
[AMDGPU] Fix flags of V_MOV_B64_PSEUDO
Browse files Browse the repository at this point in the history
In particular it was not rematerializable.

Differential Revision: https://reviews.llvm.org/D105724
  • Loading branch information
rampitec committed Jul 9, 2021
1 parent 488fcea commit 4a3b055
Show file tree
Hide file tree
Showing 19 changed files with 2,210 additions and 2,216 deletions.
8 changes: 7 additions & 1 deletion llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,13 @@ def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$vdst),
// 64-bit vector move instruction. This is mainly used by the
// SIFoldOperands pass to enable folding of inline immediates.
def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst),
(ins VSrc_b64:$src0)>;
(ins VSrc_b64:$src0)> {
let isReMaterializable = 1;
let isAsCheapAsAMove = 1;
let isMoveImm = 1;
let SchedRW = [Write64Bit];
let Size = 16; // Needs maximum 2 v_mov_b32 instructions 8 byte long each.
}

// 64-bit vector move with dpp. Expanded post-RA.
def V_MOV_B64_DPP_PSEUDO : VOP_DPP_Pseudo <"v_mov_b64_dpp", VOP_I64_I64> {
Expand Down
907 changes: 452 additions & 455 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll

Large diffs are not rendered by default.

2,096 changes: 1,048 additions & 1,048 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll

Large diffs are not rendered by default.

280 changes: 140 additions & 140 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll

Large diffs are not rendered by default.

390 changes: 195 additions & 195 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/addrspacecast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,8 @@ define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 {

; HSA-LABEL: {{^}}cast_neg1_group_to_flat_addrspacecast:
; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; HSA: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 {
%cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32*
Expand Down Expand Up @@ -296,9 +296,9 @@ define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 {
; CI: enable_sgpr_queue_ptr = 1
; GFX9: enable_sgpr_queue_ptr = 0

; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
define amdgpu_kernel void @cast_neg1_private_to_flat_addrspacecast() #0 {
%cast = addrspacecast i32 addrspace(5)* inttoptr (i32 -1 to i32 addrspace(5)*) to i32*
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3381,8 +3381,8 @@ define amdgpu_kernel void @max_i64_constant(i64 addrspace(1)* %out) {
; GFX9-NEXT: s_cbranch_execz BB18_2
; GFX9-NEXT: ; %bb.1:
; GFX9-NEXT: v_mov_b32_e32 v0, 5
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: ds_max_rtn_i64 v[0:1], v2, v[0:1]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
Expand Down Expand Up @@ -3415,8 +3415,8 @@ define amdgpu_kernel void @max_i64_constant(i64 addrspace(1)* %out) {
; GFX1064-NEXT: s_cbranch_execz BB18_2
; GFX1064-NEXT: ; %bb.1:
; GFX1064-NEXT: v_mov_b32_e32 v0, 5
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1064-NEXT: ds_max_rtn_i64 v[0:1], v2, v[0:1]
Expand Down Expand Up @@ -3448,8 +3448,8 @@ define amdgpu_kernel void @max_i64_constant(i64 addrspace(1)* %out) {
; GFX1032-NEXT: s_cbranch_execz BB18_2
; GFX1032-NEXT: ; %bb.1:
; GFX1032-NEXT: v_mov_b32_e32 v0, 5
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1032-NEXT: ds_max_rtn_i64 v[0:1], v2, v[0:1]
Expand Down Expand Up @@ -3798,8 +3798,8 @@ define amdgpu_kernel void @min_i64_constant(i64 addrspace(1)* %out) {
; GFX9-NEXT: s_cbranch_execz BB20_2
; GFX9-NEXT: ; %bb.1:
; GFX9-NEXT: v_mov_b32_e32 v0, 5
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: ds_min_rtn_i64 v[0:1], v2, v[0:1]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
Expand Down Expand Up @@ -3832,8 +3832,8 @@ define amdgpu_kernel void @min_i64_constant(i64 addrspace(1)* %out) {
; GFX1064-NEXT: s_cbranch_execz BB20_2
; GFX1064-NEXT: ; %bb.1:
; GFX1064-NEXT: v_mov_b32_e32 v0, 5
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1064-NEXT: ds_min_rtn_i64 v[0:1], v2, v[0:1]
Expand Down Expand Up @@ -3865,8 +3865,8 @@ define amdgpu_kernel void @min_i64_constant(i64 addrspace(1)* %out) {
; GFX1032-NEXT: s_cbranch_execz BB20_2
; GFX1032-NEXT: ; %bb.1:
; GFX1032-NEXT: v_mov_b32_e32 v0, 5
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1032-NEXT: ds_min_rtn_i64 v[0:1], v2, v[0:1]
Expand Down Expand Up @@ -4209,8 +4209,8 @@ define amdgpu_kernel void @umax_i64_constant(i64 addrspace(1)* %out) {
; GFX9-NEXT: s_cbranch_execz BB22_2
; GFX9-NEXT: ; %bb.1:
; GFX9-NEXT: v_mov_b32_e32 v0, 5
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: ds_max_rtn_u64 v[0:1], v2, v[0:1]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
Expand Down Expand Up @@ -4242,8 +4242,8 @@ define amdgpu_kernel void @umax_i64_constant(i64 addrspace(1)* %out) {
; GFX1064-NEXT: s_cbranch_execz BB22_2
; GFX1064-NEXT: ; %bb.1:
; GFX1064-NEXT: v_mov_b32_e32 v0, 5
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1064-NEXT: ds_max_rtn_u64 v[0:1], v2, v[0:1]
Expand Down Expand Up @@ -4275,8 +4275,8 @@ define amdgpu_kernel void @umax_i64_constant(i64 addrspace(1)* %out) {
; GFX1032-NEXT: s_cbranch_execz BB22_2
; GFX1032-NEXT: ; %bb.1:
; GFX1032-NEXT: v_mov_b32_e32 v0, 5
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1032-NEXT: ds_max_rtn_u64 v[0:1], v2, v[0:1]
Expand Down Expand Up @@ -4619,8 +4619,8 @@ define amdgpu_kernel void @umin_i64_constant(i64 addrspace(1)* %out) {
; GFX9-NEXT: s_cbranch_execz BB24_2
; GFX9-NEXT: ; %bb.1:
; GFX9-NEXT: v_mov_b32_e32 v0, 5
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: ds_min_rtn_u64 v[0:1], v2, v[0:1]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
Expand Down Expand Up @@ -4652,8 +4652,8 @@ define amdgpu_kernel void @umin_i64_constant(i64 addrspace(1)* %out) {
; GFX1064-NEXT: s_cbranch_execz BB24_2
; GFX1064-NEXT: ; %bb.1:
; GFX1064-NEXT: v_mov_b32_e32 v0, 5
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1064-NEXT: ds_min_rtn_u64 v[0:1], v2, v[0:1]
Expand Down Expand Up @@ -4685,8 +4685,8 @@ define amdgpu_kernel void @umin_i64_constant(i64 addrspace(1)* %out) {
; GFX1032-NEXT: s_cbranch_execz BB24_2
; GFX1032-NEXT: ; %bb.1:
; GFX1032-NEXT: v_mov_b32_e32 v0, 5
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1032-NEXT: ds_min_rtn_u64 v[0:1], v2, v[0:1]
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -224,10 +224,10 @@ define <2 x half> @chain_hi_to_lo_global() {
; GCN-NEXT: v_mov_b32_e32 v0, 2
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: global_load_ushort v0, v[0:1], off
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: global_load_short_d16_hi v0, v[1:2], off
; GCN-NEXT: global_load_short_d16_hi v0, v[2:3], off
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
;
Expand Down Expand Up @@ -292,10 +292,10 @@ define <2 x half> @chain_hi_to_lo_flat() {
; GCN-NEXT: v_mov_b32_e32 v0, 2
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: flat_load_ushort v0, v[0:1]
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GCN-NEXT: flat_load_short_d16_hi v0, v[1:2]
; GCN-NEXT: flat_load_short_d16_hi v0, v[2:3]
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
;
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,14 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.v
; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX9-NEXT: s_mov_b64 vcc, 0
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0
; GFX9-NEXT: v_sub_u32_e32 v3, 0, v0
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7b
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v1, s0
; GFX9-NEXT: v_div_fmas_f32 v2, v1, v1, v1
; GFX9-NEXT: v_mov_b32_e32 v1, 0x7b
; GFX9-NEXT: ds_write_b32 v0, v1 offset:12
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: ds_write_b32 v3, v4 offset:12
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
Expand All @@ -84,11 +84,11 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.v
; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-NEXT: s_mov_b32 vcc_lo, 0
; GFX10-NEXT: v_mov_b32_e32 v2, 0x7b
; GFX10-NEXT: v_sub_nc_u32_e32 v3, 0, v0
; GFX10-NEXT: v_mov_b32_e32 v3, 0x7b
; GFX10-NEXT: v_sub_nc_u32_e32 v2, 0, v0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: ds_write_b32 v3, v2 offset:12
; GFX10-NEXT: ds_write_b32 v2, v3 offset:12
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_div_fmas_f32 v4, s0, s0, s0
; GFX10-NEXT: global_store_dword v[0:1], v4, off
Expand Down Expand Up @@ -315,15 +315,15 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_
; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX9-NEXT: s_mov_b64 vcc, 0
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: v_sub_u32_e32 v0, 0x3fb, v0
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: v_sub_u32_e32 v3, 0x3fb, v0
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7b
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v1, s0
; GFX9-NEXT: v_div_fmas_f32 v2, v1, v1, v1
; GFX9-NEXT: v_mov_b32_e32 v1, 0x7b
; GFX9-NEXT: ds_write2_b32 v0, v1, v3 offset1:1
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v5, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: ds_write2_b32 v3, v4, v5 offset1:1
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
Expand All @@ -333,12 +333,12 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_
; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-NEXT: s_mov_b32 vcc_lo, 0
; GFX10-NEXT: v_mov_b32_e32 v2, 0x7b
; GFX10-NEXT: v_mov_b32_e32 v3, 0
; GFX10-NEXT: v_sub_nc_u32_e32 v4, 0x3fb, v0
; GFX10-NEXT: v_mov_b32_e32 v3, 0x7b
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: v_sub_nc_u32_e32 v2, 0x3fb, v0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: ds_write2_b32 v4, v2, v3 offset1:1
; GFX10-NEXT: ds_write2_b32 v2, v3, v4 offset1:1
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_div_fmas_f32 v5, s0, s0, s0
; GFX10-NEXT: global_store_dword v[0:1], v5, off
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,14 @@ define amdgpu_kernel void @s_add_co_br_user(i32 %i) {
; GFX7-NEXT: s_cbranch_vccnz BB1_2
; GFX7-NEXT: ; %bb.1: ; %bb0
; GFX7-NEXT: v_mov_b32_e32 v0, 0
; GFX7-NEXT: v_mov_b32_e32 v2, 9
; GFX7-NEXT: v_mov_b32_e32 v1, 0
; GFX7-NEXT: v_mov_b32_e32 v2, 9
; GFX7-NEXT: flat_store_dword v[0:1], v2
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: BB1_2: ; %bb1
; GFX7-NEXT: v_mov_b32_e32 v0, 0
; GFX7-NEXT: v_mov_b32_e32 v2, 10
; GFX7-NEXT: v_mov_b32_e32 v1, 0
; GFX7-NEXT: v_mov_b32_e32 v2, 10
; GFX7-NEXT: flat_store_dword v[0:1], v2
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_endpgm
Expand All @@ -112,14 +112,14 @@ define amdgpu_kernel void @s_add_co_br_user(i32 %i) {
; GFX9-NEXT: s_cbranch_vccnz BB1_2
; GFX9-NEXT: ; %bb.1: ; %bb0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v2, 9
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: v_mov_b32_e32 v2, 9
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: BB1_2: ; %bb1
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v2, 10
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: v_mov_b32_e32 v2, 10
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
Expand All @@ -138,14 +138,14 @@ define amdgpu_kernel void @s_add_co_br_user(i32 %i) {
; GFX10-NEXT: s_cbranch_vccnz BB1_2
; GFX10-NEXT: ; %bb.1: ; %bb0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v2, 9
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_mov_b32_e32 v2, 9
; GFX10-NEXT: global_store_dword v[0:1], v2, off
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: BB1_2: ; %bb1
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v2, 10
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_mov_b32_e32 v2, 10
; GFX10-NEXT: global_store_dword v[0:1], v2, off
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_endpgm
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,8 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %pt

; GCN-LABEL: {{^}}global_atomic_dec_ret_i64:
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GFX9-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; CIVI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}

; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
Expand All @@ -354,8 +354,8 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64

; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset:
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GFX9-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; CIVI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
Expand All @@ -367,8 +367,8 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %o

; GCN-LABEL: {{^}}global_atomic_dec_noret_i64:
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GFX9-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; CIVI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GFX9: global_atomic_dec_x2 v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]$}}
define amdgpu_kernel void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) nounwind {
Expand All @@ -378,8 +378,8 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) n

; GCN-LABEL: {{^}}global_atomic_dec_noret_i64_offset:
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GFX9-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; CIVI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
; GFX9: global_atomic_dec_x2 v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,8 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %pt

; GCN-LABEL: {{^}}global_atomic_inc_ret_i64:
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
define amdgpu_kernel void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
Expand All @@ -201,9 +201,9 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64
}

; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset:
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GFX9-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
Expand All @@ -215,8 +215,8 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %o

; GCN-LABEL: {{^}}global_atomic_inc_noret_i64:
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GFX9-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}

; GFX9: global_atomic_inc_x2 v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]$}}
Expand All @@ -227,8 +227,8 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) n

; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset:
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GFX9-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
; GFX9: global_atomic_inc_x2 v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
Expand Down
Loading

0 comments on commit 4a3b055

Please sign in to comment.