-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU][SIInsertWaitcnts] Set initial state for VS_CNT in non-kernel functions #75436
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Pierre van Houtryve (Pierre-vh) ChangesSplit from #72830 Patch is 27.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/75436.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index c16583f6a7f9ac..dfe67f4c189540 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -292,6 +292,11 @@ class WaitcntBrackets {
VgprVmemTypes[GprNo] = 0;
}
+ void setNonKernelFunctionInitialState() {
+ setScoreUB(VS_CNT, getWaitCountMax(VS_CNT));
+ PendingEvents |= WaitEventMaskForInst[VS_CNT];
+ }
+
void print(raw_ostream &);
void dump() { print(dbgs()); }
@@ -1865,6 +1870,11 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
;
BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0);
+ auto NonKernelInitialState =
+ std::make_unique<WaitcntBrackets>(ST, Limits, Encoding);
+ NonKernelInitialState->setNonKernelFunctionInitialState();
+ BlockInfos[&EntryBB].Incoming = std::move(NonKernelInitialState);
+
Modified = true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll b/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll
index 1ccd31e97a2aba..36ddd286155a34 100644
--- a/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll
+++ b/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll
@@ -55,6 +55,7 @@ define void @back_off_barrier_no_fence(ptr %in, ptr %out) #0 {
; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-BACKOFF-NEXT: flat_load_b32 v0, v[0:1]
; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-BACKOFF-NEXT: s_barrier
; GFX11-BACKOFF-NEXT: flat_store_b32 v[2:3], v0
; GFX11-BACKOFF-NEXT: s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll
index 6fd6d6e2e31a1c..65b70587fa0ace 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll
@@ -17,8 +17,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg(ptr addrspace(1) %out, i32 %
; GFX11-NEXT: v_mov_b32_e32 v0, v10
; GFX11-NEXT: s_not_b32 exec_lo, exec_lo
; GFX11-NEXT: global_store_b32 v[8:9], v0, off
-; GFX11-NEXT: s_nop 0
-; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX10-LABEL: set_inactive_chain_arg:
@@ -39,8 +37,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg(ptr addrspace(1) %out, i32 %
; GFX11_W64-NEXT: v_mov_b32_e32 v0, v10
; GFX11_W64-NEXT: s_not_b64 exec, exec
; GFX11_W64-NEXT: global_store_b32 v[8:9], v0, off
-; GFX11_W64-NEXT: s_nop 0
-; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W64-NEXT: s_endpgm
;
; GFX10_W64-LABEL: set_inactive_chain_arg:
@@ -68,8 +64,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_64(ptr addrspace(1) %out, i6
; GFX11-NEXT: v_mov_b32_e32 v1, v11
; GFX11-NEXT: s_not_b32 exec_lo, exec_lo
; GFX11-NEXT: global_store_b64 v[8:9], v[0:1], off
-; GFX11-NEXT: s_nop 0
-; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX10-LABEL: set_inactive_chain_arg_64:
@@ -94,8 +88,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_64(ptr addrspace(1) %out, i6
; GFX11_W64-NEXT: v_mov_b32_e32 v1, v11
; GFX11_W64-NEXT: s_not_b64 exec, exec
; GFX11_W64-NEXT: global_store_b64 v[8:9], v[0:1], off
-; GFX11_W64-NEXT: s_nop 0
-; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W64-NEXT: s_endpgm
;
; GFX10_W64-LABEL: set_inactive_chain_arg_64:
@@ -133,8 +125,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_dpp(ptr addrspace(1) %out, i
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mov_b32_e32 v2, v1
; GFX11-NEXT: global_store_b32 v[8:9], v2, off
-; GFX11-NEXT: s_nop 0
-; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX10-LABEL: set_inactive_chain_arg_dpp:
@@ -174,8 +164,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_dpp(ptr addrspace(1) %out, i
; GFX11_W64-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11_W64-NEXT: v_mov_b32_e32 v2, v1
; GFX11_W64-NEXT: global_store_b32 v[8:9], v2, off
-; GFX11_W64-NEXT: s_nop 0
-; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W64-NEXT: s_endpgm
;
; GFX10_W64-LABEL: set_inactive_chain_arg_dpp:
@@ -233,8 +221,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_call(ptr addrspace(1) %out,
; GISEL11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL11-NEXT: v_mov_b32_e32 v0, v12
; GISEL11-NEXT: global_store_b32 v[41:42], v0, off
-; GISEL11-NEXT: s_nop 0
-; GISEL11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL11-NEXT: s_endpgm
;
; DAGISEL11-LABEL: set_inactive_chain_arg_call:
@@ -265,8 +251,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_call(ptr addrspace(1) %out,
; DAGISEL11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL11-NEXT: v_mov_b32_e32 v0, v12
; DAGISEL11-NEXT: global_store_b32 v[41:42], v0, off
-; DAGISEL11-NEXT: s_nop 0
-; DAGISEL11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; DAGISEL11-NEXT: s_endpgm
;
; GISEL10-LABEL: set_inactive_chain_arg_call:
@@ -380,8 +364,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_call(ptr addrspace(1) %out,
; GISEL11_W64-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL11_W64-NEXT: v_mov_b32_e32 v0, v12
; GISEL11_W64-NEXT: global_store_b32 v[41:42], v0, off
-; GISEL11_W64-NEXT: s_nop 0
-; GISEL11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL11_W64-NEXT: s_endpgm
;
; DAGISEL11_W64-LABEL: set_inactive_chain_arg_call:
@@ -419,8 +401,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_call(ptr addrspace(1) %out,
; DAGISEL11_W64-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL11_W64-NEXT: v_mov_b32_e32 v0, v12
; DAGISEL11_W64-NEXT: global_store_b32 v[41:42], v0, off
-; DAGISEL11_W64-NEXT: s_nop 0
-; DAGISEL11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; DAGISEL11_W64-NEXT: s_endpgm
;
; GISEL10_W64-LABEL: set_inactive_chain_arg_call:
@@ -538,8 +518,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_last_vgpr(ptr addrspace(1) %
; GISEL11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL11-NEXT: v_mov_b32_e32 v0, v12
; GISEL11-NEXT: global_store_b32 v[41:42], v0, off
-; GISEL11-NEXT: s_nop 0
-; GISEL11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL11-NEXT: s_endpgm
;
; DAGISEL11-LABEL: set_inactive_chain_arg_last_vgpr:
@@ -570,8 +548,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_last_vgpr(ptr addrspace(1) %
; DAGISEL11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL11-NEXT: v_mov_b32_e32 v0, v12
; DAGISEL11-NEXT: global_store_b32 v[41:42], v0, off
-; DAGISEL11-NEXT: s_nop 0
-; DAGISEL11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; DAGISEL11-NEXT: s_endpgm
;
; GISEL10-LABEL: set_inactive_chain_arg_last_vgpr:
@@ -685,8 +661,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_last_vgpr(ptr addrspace(1) %
; GISEL11_W64-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL11_W64-NEXT: v_mov_b32_e32 v0, v12
; GISEL11_W64-NEXT: global_store_b32 v[41:42], v0, off
-; GISEL11_W64-NEXT: s_nop 0
-; GISEL11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL11_W64-NEXT: s_endpgm
;
; DAGISEL11_W64-LABEL: set_inactive_chain_arg_last_vgpr:
@@ -724,8 +698,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_last_vgpr(ptr addrspace(1) %
; DAGISEL11_W64-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL11_W64-NEXT: v_mov_b32_e32 v0, v12
; DAGISEL11_W64-NEXT: global_store_b32 v[41:42], v0, off
-; DAGISEL11_W64-NEXT: s_nop 0
-; DAGISEL11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; DAGISEL11_W64-NEXT: s_endpgm
;
; GISEL10_W64-LABEL: set_inactive_chain_arg_last_vgpr:
diff --git a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
index 3a879e818af797..e57fc0311bd3c6 100644
--- a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
+++ b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -O2 -march=amdgcn -mcpu=gfx1100 -run-pass=si-insert-waitcnts -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=CHECK,OPT
-# RUN: llc -O0 -march=amdgcn -mcpu=gfx1100 -run-pass=si-insert-waitcnts -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=CHECK,NOOPT
+# RUN: llc -O2 -march=amdgcn -mcpu=gfx1100 -run-pass=si-insert-waitcnts -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -O0 -march=amdgcn -mcpu=gfx1100 -run-pass=si-insert-waitcnts -verify-machineinstrs -o - %s | FileCheck %s
--- |
define amdgpu_ps void @tbuffer_store1() { ret void }
@@ -28,17 +28,10 @@
name: tbuffer_store1
body: |
bb.0:
- ; OPT-LABEL: name: tbuffer_store1
- ; OPT: S_WAITCNT 0
- ; OPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec
- ; OPT-NEXT: S_NOP 0
- ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; OPT-NEXT: S_ENDPGM 0
- ;
- ; NOOPT-LABEL: name: tbuffer_store1
- ; NOOPT: S_WAITCNT 0
- ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec
- ; NOOPT-NEXT: S_ENDPGM 0
+ ; CHECK-LABEL: name: tbuffer_store1
+ ; CHECK: S_WAITCNT 0
+ ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -47,17 +40,10 @@ body: |
name: tbuffer_store2
body: |
bb.0:
- ; OPT-LABEL: name: tbuffer_store2
- ; OPT: S_WAITCNT 0
- ; OPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7)
- ; OPT-NEXT: S_NOP 0
- ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; OPT-NEXT: S_ENDPGM 0
- ;
- ; NOOPT-LABEL: name: tbuffer_store2
- ; NOOPT: S_WAITCNT 0
- ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7)
- ; NOOPT-NEXT: S_ENDPGM 0
+ ; CHECK-LABEL: name: tbuffer_store2
+ ; CHECK: S_WAITCNT 0
+ ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7)
+ ; CHECK-NEXT: S_ENDPGM 0
TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7)
S_ENDPGM 0
...
@@ -78,19 +64,11 @@ body: |
name: global_store
body: |
bb.0:
- ; OPT-LABEL: name: global_store
- ; OPT: S_WAITCNT 0
- ; OPT-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
- ; OPT-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
- ; OPT-NEXT: S_NOP 0
- ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; OPT-NEXT: S_ENDPGM 0
- ;
- ; NOOPT-LABEL: name: global_store
- ; NOOPT: S_WAITCNT 0
- ; NOOPT-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
- ; NOOPT-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
- ; NOOPT-NEXT: S_ENDPGM 0
+ ; CHECK-LABEL: name: global_store
+ ; CHECK: S_WAITCNT 0
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
+ ; CHECK-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
+ ; CHECK-NEXT: S_ENDPGM 0
GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
S_WAITCNT_VSCNT undef $sgpr_null, 0
S_ENDPGM 0
@@ -100,17 +78,10 @@ body: |
name: buffer_store_format
body: |
bb.0:
- ; OPT-LABEL: name: buffer_store_format
- ; OPT: S_WAITCNT 0
- ; OPT-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec
- ; OPT-NEXT: S_NOP 0
- ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; OPT-NEXT: S_ENDPGM 0
- ;
- ; NOOPT-LABEL: name: buffer_store_format
- ; NOOPT: S_WAITCNT 0
- ; NOOPT-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec
- ; NOOPT-NEXT: S_ENDPGM 0
+ ; CHECK-LABEL: name: buffer_store_format
+ ; CHECK: S_WAITCNT 0
+ ; CHECK-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -137,23 +108,13 @@ body: |
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
- ; OPT-LABEL: name: global_store_dword
- ; OPT: liveins: $vgpr0, $sgpr0_sgpr1
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: S_WAITCNT 0
- ; OPT-NEXT: renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec
- ; OPT-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec
- ; OPT-NEXT: S_NOP 0
- ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; OPT-NEXT: S_ENDPGM 0
- ;
- ; NOOPT-LABEL: name: global_store_dword
- ; NOOPT: liveins: $vgpr0, $sgpr0_sgpr1
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: S_WAITCNT 0
- ; NOOPT-NEXT: renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec
- ; NOOPT-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec
- ; NOOPT-NEXT: S_ENDPGM 0
+ ; CHECK-LABEL: name: global_store_dword
+ ; CHECK: liveins: $vgpr0, $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT 0
+ ; CHECK-NEXT: renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec
GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec
S_ENDPGM 0
@@ -206,45 +167,24 @@ body: |
---
name: multiple_basic_blocks2
body: |
- ; OPT-LABEL: name: multiple_basic_blocks2
- ; OPT: bb.0:
- ; OPT-NEXT: successors: %bb.2(0x80000000)
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: S_WAITCNT 0
- ; OPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
- ; OPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; OPT-NEXT: S_BRANCH %bb.2
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: bb.1:
- ; OPT-NEXT: successors: %bb.2(0x80000000)
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; OPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
- ; OPT-NEXT: S_BRANCH %bb.2
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: bb.2:
- ; OPT-NEXT: S_NOP 0
- ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; OPT-NEXT: S_ENDPGM 0
- ;
- ; NOOPT-LABEL: name: multiple_basic_blocks2
- ; NOOPT: bb.0:
- ; NOOPT-NEXT: successors: %bb.2(0x80000000)
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: S_WAITCNT 0
- ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
- ; NOOPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; NOOPT-NEXT: S_BRANCH %bb.2
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: bb.1:
- ; NOOPT-NEXT: successors: %bb.2(0x80000000)
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
- ; NOOPT-NEXT: S_BRANCH %bb.2
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: bb.2:
- ; NOOPT-NEXT: S_ENDPGM 0
+ ; CHECK-LABEL: name: multiple_basic_blocks2
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT 0
+ ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+ ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.2
@@ -268,65 +208,34 @@ body: |
---
name: multiple_basic_blocks3
body: |
- ; OPT-LABEL: name: multiple_basic_blocks3
- ; OPT: bb.0:
- ; OPT-NEXT: successors: %bb.2(0x80000000)
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: S_WAITCNT 0
- ; OPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; OPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
- ; OPT-NEXT: S_BRANCH %bb.2
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: bb.1:
- ; OPT-NEXT: successors: %bb.2(0x80000000)
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; OPT-NEXT: S_BRANCH %bb.2
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: bb.2:
- ; OPT-NEXT: successors: %bb.4(0x80000000)
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: S_BRANCH %bb.4
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: bb.3:
- ; OPT-NEXT: successors: %bb.4(0x80000000)
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; OPT-NEXT: S_BRANCH %bb.4
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: bb.4:
- ; OPT-NEXT: S_NOP 0
- ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; OPT-NEXT: S_ENDPGM 0
- ;
- ; NOOPT-LABEL: name: multiple_basic_blocks3
- ; NOOPT: bb.0:
- ; NOOPT-NEXT: successors: %bb.2(0x80000000)
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: S_WAITCNT 0
- ; NOOPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
- ; NOOPT-NEXT: S_BRANCH %bb.2
- ; NOOPT-NEXT: {{ $}}
- ; NO...
[truncated]
|
@@ -55,6 +55,7 @@ define void @back_off_barrier_no_fence(ptr %in, ptr %out) #0 { | |||
; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |||
; GFX11-BACKOFF-NEXT: flat_load_b32 v0, v[0:1] | |||
; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) | |||
; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess this is a bug fix. @kerbowa would you agree? The code in generateWaitcntInstBefore
certainly looks like it's trying to wait for all counters to be zero. So the bug was that previously, this pass wrongly assumed that vs_cnt would be zero on entry to a non-kernel function,
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks.
@@ -17,8 +17,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg(ptr addrspace(1) %out, i32 % | |||
; GFX11-NEXT: v_mov_b32_e32 v0, v10 | |||
; GFX11-NEXT: s_not_b32 exec_lo, exec_lo | |||
; GFX11-NEXT: global_store_b32 v[8:9], v0, off | |||
; GFX11-NEXT: s_nop 0 | |||
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is fixing the bug explained in #72245.
Split from #72830