-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AMDGPU] Remove redundant s_cmp_lg_* sX, 0 #162352
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
e65436c
050ff96
dca3d5a
272942b
dc14a4f
779f4aa
39380bb
cae0fb8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10577,6 +10577,67 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, | |
if (SrcReg2 && !getFoldableImm(SrcReg2, *MRI, CmpValue)) | ||
return false; | ||
|
||
const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI, | ||
this]() -> bool { | ||
if (CmpValue != 0) | ||
return false; | ||
|
||
MachineInstr *Def = MRI->getUniqueVRegDef(SrcReg); | ||
if (!Def || Def->getParent() != CmpInstr.getParent()) | ||
return false; | ||
|
||
bool CanOptimize = false; | ||
MachineOperand *SccDef = | ||
Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr); | ||
|
||
// For S_OP that set SCC = DST!=0, do the transformation | ||
// | ||
// s_cmp_lg_* (S_OP ...), 0 => (S_OP ...) | ||
if (SccDef && Def->getOpcode() != AMDGPU::S_ADD_I32 && | ||
Def->getOpcode() != AMDGPU::S_ADD_U32 && | ||
Def->getOpcode() != AMDGPU::S_ADDC_U32 && | ||
Def->getOpcode() != AMDGPU::S_SUB_I32 && | ||
Def->getOpcode() != AMDGPU::S_SUB_U32 && | ||
Def->getOpcode() != AMDGPU::S_SUBB_U32 && | ||
Def->getOpcode() != AMDGPU::S_MIN_I32 && | ||
Def->getOpcode() != AMDGPU::S_MIN_U32 && | ||
Def->getOpcode() != AMDGPU::S_MAX_I32 && | ||
Def->getOpcode() != AMDGPU::S_MAX_U32 && | ||
Def->getOpcode() != AMDGPU::S_ADDK_I32) | ||
CanOptimize = true; | ||
|
||
// s_cmp_lg_* is redundant because the SCC input value for S_CSELECT* has | ||
// the same value that will be calculated by s_cmp_lg_* | ||
// | ||
// s_cmp_lg_* (S_CSELECT* (non-zero imm), 0), 0 => (S_CSELECT* (non-zero | ||
// imm), 0) | ||
if (Def->getOpcode() == AMDGPU::S_CSELECT_B32 || | ||
Def->getOpcode() == AMDGPU::S_CSELECT_B64) { | ||
bool Op1IsNonZeroImm = | ||
Def->getOperand(1).isImm() && Def->getOperand(1).getImm() != 0; | ||
bool Op2IsZeroImm = | ||
Def->getOperand(2).isImm() && Def->getOperand(2).getImm() == 0; | ||
if (Op1IsNonZeroImm && Op2IsZeroImm) | ||
CanOptimize = true; | ||
} | ||
|
||
if (!CanOptimize) | ||
return false; | ||
|
||
for (auto I = std::next(Def->getIterator()), E = CmpInstr.getIterator(); | ||
I != E; ++I) { | ||
if (I->modifiesRegister(AMDGPU::SCC, &RI) || | ||
I->killsRegister(AMDGPU::SCC, &RI)) | ||
return false; | ||
} | ||
Comment on lines
+10627
to
+10632
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think you should need to be scanning around for physreg liveness, I don't see other targets doing that here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Scanning for register liveness is also done in the other lambda, optimizeCmpAnd, (line 10752) that is used in optimizeCompareInstr. It is necessary to prevent incorrectly deleting a s_cmp if an there is an intervening definition. In PR #161582, tests were added that requires this scanning. PR #161582 was done to add SCC as an implicit destination of s_quadmask*. Without this fix the scanning would not detect a conflict. |
||
|
||
if (SccDef) | ||
SccDef->setIsDead(false); | ||
|
||
CmpInstr.eraseFromParent(); | ||
return true; | ||
}; | ||
|
||
const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI, | ||
this](int64_t ExpectedValue, unsigned SrcSize, | ||
bool IsReversible, bool IsSigned) -> bool { | ||
|
@@ -10704,15 +10765,15 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, | |
case AMDGPU::S_CMP_LG_I32: | ||
case AMDGPU::S_CMPK_LG_U32: | ||
case AMDGPU::S_CMPK_LG_I32: | ||
return optimizeCmpAnd(0, 32, true, false); | ||
return optimizeCmpAnd(0, 32, true, false) || optimizeCmpSelect(); | ||
case AMDGPU::S_CMP_GT_U32: | ||
case AMDGPU::S_CMPK_GT_U32: | ||
return optimizeCmpAnd(0, 32, false, false); | ||
case AMDGPU::S_CMP_GT_I32: | ||
case AMDGPU::S_CMPK_GT_I32: | ||
return optimizeCmpAnd(0, 32, false, true); | ||
case AMDGPU::S_CMP_LG_U64: | ||
return optimizeCmpAnd(0, 64, true, false); | ||
return optimizeCmpAnd(0, 64, true, false) || optimizeCmpSelect(); | ||
} | ||
|
||
return false; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't see why the parent would matter, but this also looks untested
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Def and CmpInstr need to have the same parent so that the scan will work.