Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 6 additions & 12 deletions llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1291,21 +1291,15 @@ void WaitcntBrackets::applyXcnt(const AMDGPU::Waitcnt &Wait) {
// On entry to a block with multiple predescessors, there may
// be pending SMEM and VMEM events active at the same time.
// In such cases, only clear one active event at a time.
auto applyPendingXcntGroup = [this](unsigned E) {
unsigned LowerBound = getScoreLB(X_CNT);
applyWaitcnt(X_CNT, 0);
PendingEvents |= (1 << E);
setScoreLB(X_CNT, LowerBound);
};

// Wait on XCNT is redundant if we are already waiting for a load to complete.
// SMEM can return out of order, so only omit XCNT wait if we are waiting till
// zero.
if (Wait.KmCnt == 0 && hasPendingEvent(SMEM_GROUP)) {
if (hasPendingEvent(VMEM_GROUP))
applyPendingXcntGroup(VMEM_GROUP);
else
if (!hasMixedPendingEvents(X_CNT))
applyWaitcnt(X_CNT, 0);
else
PendingEvents &= ~(1 << SMEM_GROUP);
return;
}

Expand All @@ -1314,10 +1308,10 @@ void WaitcntBrackets::applyXcnt(const AMDGPU::Waitcnt &Wait) {
// decremented to the same number as LOADCnt.
if (Wait.LoadCnt != ~0u && hasPendingEvent(VMEM_GROUP) &&
!hasPendingEvent(STORE_CNT)) {
if (hasPendingEvent(SMEM_GROUP))
applyPendingXcntGroup(SMEM_GROUP);
else
if (!hasMixedPendingEvents(X_CNT))
applyWaitcnt(X_CNT, std::min(Wait.XCnt, Wait.LoadCnt));
else if (Wait.LoadCnt == 0)
PendingEvents &= ~(1 << VMEM_GROUP);
return;
}

Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/wait-xcnt.mir
Original file line number Diff line number Diff line change
Expand Up @@ -1069,7 +1069,6 @@ body: |
$sgpr0 = S_MOV_B32 $sgpr0
...

# FIXME: Missing S_WAIT_XCNT before overwriting vgpr0.
---
name: mixed_pending_events
tracksRegLiveness: true
Expand All @@ -1088,8 +1087,8 @@ body: |
; GCN-NEXT: successors: %bb.2(0x80000000)
; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr2
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
; GCN-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 100, 0, implicit $exec
; GCN-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 200, 0, implicit $exec
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.2:
; GCN-NEXT: liveins: $sgpr2, $vgpr2
Expand All @@ -1098,15 +1097,16 @@ body: |
; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr2, implicit $exec
; GCN-NEXT: S_WAIT_KMCNT 0
; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2
; GCN-NEXT: S_WAIT_XCNT 0
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
bb.0:
liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
S_CBRANCH_SCC1 %bb.2, implicit $scc
bb.1:
liveins: $vgpr0_vgpr1, $sgpr2
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
$vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 100, 0, implicit $exec
$vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 200, 0, implicit $exec
bb.2:
liveins: $sgpr2, $vgpr2
$vgpr2 = V_MOV_B32_e32 $vgpr2, implicit $exec
Expand Down