-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[AMDGPU] Invert scc uses to delete s_cmp_eq* #167382
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
fa8aded
7cac738
781e611
139b622
3bf05ff
65ccbf4
35c3de8
adc2b32
d01205f
729b232
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10737,12 +10737,62 @@ bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, | |
| return false; | ||
| } | ||
|
|
||
| // Invert all uses of SCC following SCCDef because SCCDef may be deleted and | ||
| // (incoming SCC) = !(SCC defined by SCCDef). | ||
| // Return true if all uses can be re-written, false otherwise. | ||
| bool SIInstrInfo::invertSCCUse(MachineInstr *SCCDef) const { | ||
| MachineBasicBlock *MBB = SCCDef->getParent(); | ||
| SmallVector<MachineInstr *> InvertInstr; | ||
| bool SCCIsDead = false; | ||
|
|
||
| // Scan instructions for SCC uses that need to be inverted until SCC is dead. | ||
| for (MachineInstr &MI : | ||
| make_range(std::next(MachineBasicBlock::iterator(SCCDef)), MBB->end())) { | ||
| if (MI.readsRegister(AMDGPU::SCC, &RI)) { | ||
| if (MI.getOpcode() == AMDGPU::S_CSELECT_B32 || | ||
| MI.getOpcode() == AMDGPU::S_CSELECT_B64 || | ||
| MI.getOpcode() == AMDGPU::S_CBRANCH_SCC0 || | ||
| MI.getOpcode() == AMDGPU::S_CBRANCH_SCC1) | ||
| InvertInstr.push_back(&MI); | ||
| else | ||
| return false; | ||
| } | ||
| if (MI.definesRegister(AMDGPU::SCC, &RI) || | ||
| MI.killsRegister(AMDGPU::SCC, &RI)) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I meant check the dead flag on the operand, not kill flags
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Isn't a dead SCC operand already covered by definesRegister(AMDGPU::SCC)? A dead operand is an unused definition.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, dead register is a subset of defined register if it's known there is no use of the value. I want to avoid introducing new uses of kill flags, which are soft deprecated. Dead is the opposite of kill
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
So does the dead flag mean that a register is not live after an instruction? It can be set for operands that are only used and not defined by the instruction? Looking at https://llvm.org/docs/MIRLangRef.html#register-flags, dead is an "Unused definition", which is different from not live after an instruction. |
||
| SCCIsDead = true; | ||
| break; | ||
| } | ||
| } | ||
| if (MBB->succ_empty()) | ||
| SCCIsDead = true; | ||
|
|
||
| // SCC may have more uses. Can't invert all of them. | ||
| if (!SCCIsDead) | ||
| return false; | ||
|
|
||
| // Invert uses | ||
| for (MachineInstr *MI : InvertInstr) { | ||
| if (MI->getOpcode() == AMDGPU::S_CSELECT_B32 || | ||
| MI->getOpcode() == AMDGPU::S_CSELECT_B64) { | ||
| swapOperands(*MI); | ||
| } else if (MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0 || | ||
| MI->getOpcode() == AMDGPU::S_CBRANCH_SCC1) { | ||
| MI->setDesc(get(MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0 | ||
| ? AMDGPU::S_CBRANCH_SCC1 | ||
| : AMDGPU::S_CBRANCH_SCC0)); | ||
LU-JOHN marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } else { | ||
| llvm_unreachable("SCC used but no inversion handling"); | ||
| } | ||
| } | ||
| return true; | ||
| } | ||
|
|
||
| // SCC is already valid after SCCValid. | ||
| // SCCRedefine will redefine SCC to the same value already available after | ||
| // SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and | ||
| // update kill/dead flags if necessary. | ||
| static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine, | ||
| const SIRegisterInfo &RI) { | ||
| bool SIInstrInfo::optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine, | ||
| bool NeedInversion) const { | ||
| MachineInstr *KillsSCC = nullptr; | ||
| if (SCCValid->getParent() != SCCRedefine->getParent()) | ||
| return false; | ||
|
|
@@ -10753,6 +10803,8 @@ static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine, | |
| if (MI.killsRegister(AMDGPU::SCC, &RI)) | ||
| KillsSCC = &MI; | ||
| } | ||
| if (NeedInversion && !invertSCCUse(SCCRedefine)) | ||
| return false; | ||
| if (MachineOperand *SccDef = | ||
| SCCValid->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr)) | ||
| SccDef->setIsDead(false); | ||
|
|
@@ -10786,7 +10838,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, | |
| return false; | ||
|
|
||
| const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI, | ||
| this]() -> bool { | ||
| this](bool NeedInversion) -> bool { | ||
| if (CmpValue != 0) | ||
| return false; | ||
|
|
||
|
|
@@ -10807,7 +10859,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, | |
| if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(*Def)) | ||
| return false; | ||
|
|
||
| if (!optimizeSCC(Def, &CmpInstr, RI)) | ||
| if (!optimizeSCC(Def, &CmpInstr, NeedInversion)) | ||
| return false; | ||
|
|
||
| // If s_or_b32 result, sY, is unused (i.e. it is effectively a 64-bit | ||
|
|
@@ -10832,7 +10884,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, | |
| Def1->getOperand(1).getReg() == Def2->getOperand(1).getReg()) { | ||
| MachineInstr *Select = MRI->getVRegDef(Def1->getOperand(1).getReg()); | ||
| if (Select && foldableSelect(*Select)) | ||
| optimizeSCC(Select, Def, RI); | ||
| optimizeSCC(Select, Def, false); | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -10913,7 +10965,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, | |
| if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg)) | ||
| return false; | ||
|
|
||
| if (!optimizeSCC(Def, &CmpInstr, RI)) | ||
| if (!optimizeSCC(Def, &CmpInstr, false)) | ||
| return false; | ||
|
|
||
| if (!MRI->use_nodbg_empty(DefReg)) { | ||
|
|
@@ -10944,7 +10996,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, | |
| case AMDGPU::S_CMP_EQ_I32: | ||
| case AMDGPU::S_CMPK_EQ_U32: | ||
| case AMDGPU::S_CMPK_EQ_I32: | ||
| return optimizeCmpAnd(1, 32, true, false); | ||
| return optimizeCmpAnd(1, 32, true, false) || optimizeCmpSelect(true); | ||
| case AMDGPU::S_CMP_GE_U32: | ||
| case AMDGPU::S_CMPK_GE_U32: | ||
| return optimizeCmpAnd(1, 32, false, false); | ||
|
|
@@ -10957,15 +11009,15 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, | |
| case AMDGPU::S_CMP_LG_I32: | ||
| case AMDGPU::S_CMPK_LG_U32: | ||
| case AMDGPU::S_CMPK_LG_I32: | ||
| return optimizeCmpAnd(0, 32, true, false) || optimizeCmpSelect(); | ||
| return optimizeCmpAnd(0, 32, true, false) || optimizeCmpSelect(false); | ||
| case AMDGPU::S_CMP_GT_U32: | ||
| case AMDGPU::S_CMPK_GT_U32: | ||
| return optimizeCmpAnd(0, 32, false, false); | ||
| case AMDGPU::S_CMP_GT_I32: | ||
| case AMDGPU::S_CMPK_GT_I32: | ||
| return optimizeCmpAnd(0, 32, false, true); | ||
| case AMDGPU::S_CMP_LG_U64: | ||
| return optimizeCmpAnd(0, 64, true, false) || optimizeCmpSelect(); | ||
| return optimizeCmpAnd(0, 64, true, false) || optimizeCmpSelect(false); | ||
| } | ||
|
|
||
| return false; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.