-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[AMDGPU] Fix Xcnt handling between blocks #165201
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1288,18 +1288,38 @@ void WaitcntBrackets::applyWaitcnt(InstCounterType T, unsigned Count) { | |
| } | ||
|
|
||
| void WaitcntBrackets::applyXcnt(const AMDGPU::Waitcnt &Wait) { | ||
| // On entry to a block with multiple predescessors, there may | ||
| // be pending SMEM and VMEM events active at the same time. | ||
| // In such cases, only clear one active event at a time. | ||
| auto applyPendingXcntGroup = [this](unsigned E) { | ||
| unsigned LowerBound = getScoreLB(X_CNT); | ||
| applyWaitcnt(X_CNT, 0); | ||
| PendingEvents |= (1 << E); | ||
| setScoreLB(X_CNT, LowerBound); | ||
|
Comment on lines
+1295
to
+1298
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems like a very complicated way to write |
||
| }; | ||
|
|
||
| // Wait on XCNT is redundant if we are already waiting for a load to complete. | ||
| // SMEM can return out of order, so only omit XCNT wait if we are waiting till | ||
| // zero. | ||
| if (Wait.KmCnt == 0 && hasPendingEvent(SMEM_GROUP)) | ||
| return applyWaitcnt(X_CNT, 0); | ||
| if (Wait.KmCnt == 0 && hasPendingEvent(SMEM_GROUP)) { | ||
| if (hasPendingEvent(VMEM_GROUP)) | ||
| applyPendingXcntGroup(VMEM_GROUP); | ||
| else | ||
| applyWaitcnt(X_CNT, 0); | ||
| return; | ||
| } | ||
|
|
||
| // If we have pending store we cannot optimize XCnt because we do not wait for | ||
| // stores. VMEM loads retun in order, so if we only have loads XCnt is | ||
| // decremented to the same number as LOADCnt. | ||
| if (Wait.LoadCnt != ~0u && hasPendingEvent(VMEM_GROUP) && | ||
| !hasPendingEvent(STORE_CNT)) | ||
| return applyWaitcnt(X_CNT, std::min(Wait.XCnt, Wait.LoadCnt)); | ||
| !hasPendingEvent(STORE_CNT)) { | ||
| if (hasPendingEvent(SMEM_GROUP)) | ||
| applyPendingXcntGroup(SMEM_GROUP); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not safe. See #166154. |
||
| else | ||
| applyWaitcnt(X_CNT, std::min(Wait.XCnt, Wait.LoadCnt)); | ||
| return; | ||
| } | ||
|
|
||
| applyWaitcnt(X_CNT, Wait.XCnt); | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: this really should not have "group" in the name. Xcnt insertion has nothing to do with groups. This goes back to #145566 which added the bad names "SMEM_GROUP" and "VMEM_GROUP".