-
Notifications
You must be signed in to change notification settings - Fork 15.1k
CodeGen/AMDGPU: Allow 3-address conversion of bundled instructions #166213
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-backend-amdgpu Author: Nicolai Hähnle (nhaehnle) ChangesThis is in preparation for future changes in AMDGPU that will make more Stack:
Full diff: https://github.com/llvm/llvm-project/pull/166213.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 1f816b94cf56b..d6d23061be16e 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -794,29 +794,34 @@ bool TwoAddressInstructionImpl::convertInstTo3Addr(
if (!NewMI)
return false;
- LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
- LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
-
- // If the old instruction is debug value tracked, an update is required.
- if (auto OldInstrNum = mi->peekDebugInstrNum()) {
- assert(mi->getNumExplicitDefs() == 1);
- assert(NewMI->getNumExplicitDefs() == 1);
-
- // Find the old and new def location.
- unsigned OldIdx = mi->defs().begin()->getOperandNo();
- unsigned NewIdx = NewMI->defs().begin()->getOperandNo();
-
- // Record that one def has been replaced by the other.
- unsigned NewInstrNum = NewMI->getDebugInstrNum();
- MF->makeDebugValueSubstitution(std::make_pair(OldInstrNum, OldIdx),
- std::make_pair(NewInstrNum, NewIdx));
- }
-
- MBB->erase(mi); // Nuke the old inst.
-
for (MachineInstr &MI : MIS)
DistanceMap.insert(std::make_pair(&MI, Dist++));
- Dist--;
+
+ if (&*mi == NewMI) {
+ LLVM_DEBUG(dbgs() << "2addr: CONVERTED IN-PLACE TO 3-ADDR: " << *mi);
+ } else {
+ LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
+ LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
+
+ // If the old instruction is debug value tracked, an update is required.
+ if (auto OldInstrNum = mi->peekDebugInstrNum()) {
+ assert(mi->getNumExplicitDefs() == 1);
+ assert(NewMI->getNumExplicitDefs() == 1);
+
+ // Find the old and new def location.
+ unsigned OldIdx = mi->defs().begin()->getOperandNo();
+ unsigned NewIdx = NewMI->defs().begin()->getOperandNo();
+
+ // Record that one def has been replaced by the other.
+ unsigned NewInstrNum = NewMI->getDebugInstrNum();
+ MF->makeDebugValueSubstitution(std::make_pair(OldInstrNum, OldIdx),
+ std::make_pair(NewInstrNum, NewIdx));
+ }
+
+ MBB->erase(mi); // Nuke the old inst.
+ Dist--;
+ }
+
mi = NewMI;
nmi = std::next(mi);
@@ -1329,6 +1334,9 @@ bool TwoAddressInstructionImpl::tryInstructionTransform(
bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist);
+ // Give targets a chance to convert bundled instructions.
+ bool ConvertibleTo3Addr = MI.isConvertibleTo3Addr(MachineInstr::AnyInBundle);
+
// If the instruction is convertible to 3 Addr, instead
// of returning try 3 Addr transformation aggressively and
// use this variable to check later. Because it might be better.
@@ -1337,7 +1345,7 @@ bool TwoAddressInstructionImpl::tryInstructionTransform(
// addl %esi, %edi
// movl %edi, %eax
// ret
- if (Commuted && !MI.isConvertibleTo3Addr())
+ if (Commuted && !ConvertibleTo3Addr)
return false;
if (shouldOnlyCommute)
@@ -1357,7 +1365,7 @@ bool TwoAddressInstructionImpl::tryInstructionTransform(
regBKilled = isKilled(MI, regB, true);
}
- if (MI.isConvertibleTo3Addr()) {
+ if (ConvertibleTo3Addr) {
// This instruction is potentially convertible to a true
// three-address instruction. Check if it is profitable.
if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d930a21c2d7f5..031ed90e0ad15 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4044,10 +4044,29 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
LiveVariables *LV,
LiveIntervals *LIS) const {
MachineBasicBlock &MBB = *MI.getParent();
+ MachineInstr *CandidateMI = &MI;
+
+ if (MI.isBundle()) {
+ // This is a temporary placeholder for bundle handling that enables us to
+ // exercise the relevant code paths in the two-address instruction pass.
+ if (MI.getBundleSize() != 1)
+ return nullptr;
+ CandidateMI = MI.getNextNode();
+ }
+
ThreeAddressUpdates U;
- MachineInstr *NewMI = convertToThreeAddressImpl(MI, U);
+ MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
+ if (!NewMI)
+ return nullptr;
- if (NewMI) {
+ if (MI.isBundle()) {
+ CandidateMI->eraseFromBundle();
+
+ for (MachineOperand &MO : MI.all_defs()) {
+ if (MO.isTied())
+ MI.untieRegOperand(MO.getOperandNo());
+ }
+ } else {
updateLiveVariables(LV, MI, *NewMI);
if (LIS) {
LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
@@ -4088,7 +4107,20 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
LV->getVarInfo(DefReg).AliveBlocks.clear();
}
- if (LIS) {
+ if (MI.isBundle()) {
+ VirtRegInfo VRI = AnalyzeVirtRegInBundle(MI, DefReg);
+ if (!VRI.Reads && !VRI.Writes) {
+ for (MachineOperand &MO : MI.all_uses()) {
+ if (MO.isReg() && MO.getReg() == DefReg) {
+ MI.removeOperand(MO.getOperandNo());
+ break;
+ }
+ }
+
+ if (LIS)
+ LIS->shrinkToUses(&LIS->getInterval(DefReg));
+ }
+ } else if (LIS) {
LiveInterval &DefLI = LIS->getInterval(DefReg);
// We cannot delete the original instruction here, so hack out the use
@@ -4103,11 +4135,27 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
}
}
+ if (MI.isBundle()) {
+ VirtRegInfo VRI = AnalyzeVirtRegInBundle(MI, DefReg);
+ if (!VRI.Reads && !VRI.Writes) {
+ for (MachineOperand &MIOp : MI.uses()) {
+ if (MIOp.isReg() && MIOp.getReg() == DefReg) {
+ MIOp.setIsUndef(true);
+ MIOp.setReg(DummyReg);
+ }
+ }
+ }
+
+ auto MO = MachineOperand::CreateReg(DummyReg, false);
+ MO.setIsUndef(true);
+ MI.addOperand(MO);
+ }
+
LIS->shrinkToUses(&DefLI);
}
}
- return NewMI;
+ return MI.isBundle() ? &MI : NewMI;
}
MachineInstr *
diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir
index 696962a88c8b8..8ae50d8e0e071 100644
--- a/llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir
+++ b/llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir
@@ -31,7 +31,7 @@ body: |
...
-# This test is an example where conversion to three-address form would be beneficial.
+# This test is an example where conversion to three-address form is beneficial.
---
name: test_fmac_reuse_bundle
body: |
@@ -41,11 +41,10 @@ body: |
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
- ; GCN-NEXT: BUNDLE implicit-def [[COPY1]], implicit [[DEF]], implicit [[DEF1]], implicit [[COPY1]](tied-def 0), implicit $mode, implicit $exec {
- ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e32 killed [[DEF]], killed [[DEF1]], killed [[COPY1]], implicit $mode, implicit $exec
+ ; GCN-NEXT: BUNDLE implicit-def %3, implicit [[DEF]], implicit [[DEF1]], implicit [[COPY]], implicit $mode, implicit $exec {
+ ; GCN-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F32_e64 0, killed [[DEF]], 0, killed [[DEF1]], 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: }
- ; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], [[COPY]], 0, implicit $exec
+ ; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FMA_F32_e64_]], [[COPY]], 0, implicit $exec
%2:vgpr_32 = COPY $vgpr0
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
4ce0ced to
909e8a0
Compare
282ab76 to
4af0353
Compare
909e8a0 to
582defb
Compare
| VirtRegInfo VRI = AnalyzeVirtRegInBundle(MI, DefReg); | ||
| if (!VRI.Reads && !VRI.Writes) { | ||
| for (MachineOperand &MO : MI.all_uses()) { | ||
| if (MO.isReg() && MO.getReg() == DefReg) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Test case with subregister operand?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See the discussion with Jay in #166212 -- I looked into it and decided to just prevent and forbid tied sub-registers on bundles in pre-RA as the safer route due to the complexities involved.
I'm adding an assert to that effect here.
4af0353 to
97427fa
Compare
e0611e4 to
2ca173d
Compare
1224dba to
b65555b
Compare
69ff9c0 to
856bcd3
Compare
b65555b to
3eb0dc1
Compare
If the instruction with tied operands is a BUNDLE instruction and we handle it by replacing an operand, then we need to update the corresponding internal operands as well. Otherwise, the resulting MIR is invalid. The test case is degenerate in the sense that the bundle only contains a single instruction, but it is sufficient to exercise this issue. commit-id:6760a9b7
856bcd3 to
dd8c2ec
Compare
This is in preparation for future changes in AMDGPU that will make more substantial use of bundles pre-RA. For now, simply test this with degenerate (single-instruction) bundles. commit-id:4a30cb78
dd8c2ec to
cc06ca2
Compare
This is in preparation for future changes in AMDGPU that will make more
substantial use of bundles pre-RA. For now, simply test this with
degenerate (single-instruction) bundles.
Stack: