-
Notifications
You must be signed in to change notification settings - Fork 12k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Simplify commuted operand handling. NFCI. #71965
Conversation
SIInstrInfo::commuteInstructionImpl should accept indices to commute in either order. This simplifies SIFoldOperands::tryAddToFoldList where OtherIdx, CommuteIdx0 and CommuteIdx1 are no longer needed.
@llvm/pr-subscribers-backend-amdgpu Author: Jay Foad (jayfoad) ChangesSIInstrInfo::commuteInstructionImpl should accept indices to commute in Full diff: https://github.com/llvm/llvm-project/pull/71965.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 1ebfa297f4fc339..3f7126c156c7365 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -429,63 +429,48 @@ bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
if (isUseMIInFoldList(FoldList, MI))
return false;
- unsigned CommuteOpNo = OpNo;
-
// Operand is not legal, so try to commute the instruction to
// see if this makes it possible to fold.
- unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
- unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
- bool CanCommute = TII->findCommutedOpIndices(*MI, CommuteIdx0, CommuteIdx1);
-
- if (CanCommute) {
- if (CommuteIdx0 == OpNo)
- CommuteOpNo = CommuteIdx1;
- else if (CommuteIdx1 == OpNo)
- CommuteOpNo = CommuteIdx0;
- }
-
+ unsigned CommuteOpNo = TargetInstrInfo::CommuteAnyOperandIndex;
+ bool CanCommute = TII->findCommutedOpIndices(*MI, OpNo, CommuteOpNo);
+ if (!CanCommute)
+ return false;
// One of operands might be an Imm operand, and OpNo may refer to it after
// the call of commuteInstruction() below. Such situations are avoided
// here explicitly as OpNo must be a register operand to be a candidate
// for memory folding.
- if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() ||
- !MI->getOperand(CommuteIdx1).isReg()))
+ if (!MI->getOperand(OpNo).isReg() || !MI->getOperand(CommuteOpNo).isReg())
return false;
- if (!CanCommute ||
- !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
+ if (!TII->commuteInstruction(*MI, false, OpNo, CommuteOpNo))
return false;
+ int Op32 = -1;
if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
- if ((Opc == AMDGPU::V_ADD_CO_U32_e64 ||
- Opc == AMDGPU::V_SUB_CO_U32_e64 ||
- Opc == AMDGPU::V_SUBREV_CO_U32_e64) && // FIXME
- (OpToFold->isImm() || OpToFold->isFI() || OpToFold->isGlobal())) {
-
- // Verify the other operand is a VGPR, otherwise we would violate the
- // constant bus restriction.
- unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
- MachineOperand &OtherOp = MI->getOperand(OtherIdx);
- if (!OtherOp.isReg() ||
- !TII->getRegisterInfo().isVGPR(*MRI, OtherOp.getReg()))
- return false;
-
- assert(MI->getOperand(1).isDef());
+ if ((Opc != AMDGPU::V_ADD_CO_U32_e64 &&
+ Opc != AMDGPU::V_SUB_CO_U32_e64 &&
+ Opc != AMDGPU::V_SUBREV_CO_U32_e64) || // FIXME
+ (!OpToFold->isImm() && !OpToFold->isFI() && !OpToFold->isGlobal())) {
+ TII->commuteInstruction(*MI, false, OpNo, CommuteOpNo);
+ return false;
+ }
- // Make sure to get the 32-bit version of the commuted opcode.
- unsigned MaybeCommutedOpc = MI->getOpcode();
- int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
+ // Verify the other operand is a VGPR, otherwise we would violate the
+ // constant bus restriction.
+ MachineOperand &OtherOp = MI->getOperand(OpNo);
+ if (!OtherOp.isReg() ||
+ !TII->getRegisterInfo().isVGPR(*MRI, OtherOp.getReg()))
+ return false;
- appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true, Op32);
- return true;
- }
+ assert(MI->getOperand(1).isDef());
- TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
- return false;
+ // Make sure to get the 32-bit version of the commuted opcode.
+ unsigned MaybeCommutedOpc = MI->getOpcode();
+ Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
}
- appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true);
+ appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true, Op32);
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0b11fd5f757cbe0..027b695c3bb1a74 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2692,6 +2692,9 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
if (CommutedOpcode == -1)
return nullptr;
+ if (Src0Idx > Src1Idx)
+ std::swap(Src0Idx, Src1Idx);
+
assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) ==
static_cast<int>(Src0Idx) &&
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) ==
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
SIInstrInfo::commuteInstructionImpl should accept indices to commute in either order. This simplifies SIFoldOperands::tryAddToFoldList where OtherIdx, CommuteIdx0 and CommuteIdx1 are no longer needed.
SIInstrInfo::commuteInstructionImpl should accept indices to commute in
either order. This simplifies SIFoldOperands::tryAddToFoldList where
OtherIdx, CommuteIdx0 and CommuteIdx1 are no longer needed.