Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 42 additions & 23 deletions llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -794,29 +794,34 @@ bool TwoAddressInstructionImpl::convertInstTo3Addr(
if (!NewMI)
return false;

LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);

// If the old instruction is debug value tracked, an update is required.
if (auto OldInstrNum = mi->peekDebugInstrNum()) {
assert(mi->getNumExplicitDefs() == 1);
assert(NewMI->getNumExplicitDefs() == 1);

// Find the old and new def location.
unsigned OldIdx = mi->defs().begin()->getOperandNo();
unsigned NewIdx = NewMI->defs().begin()->getOperandNo();

// Record that one def has been replaced by the other.
unsigned NewInstrNum = NewMI->getDebugInstrNum();
MF->makeDebugValueSubstitution(std::make_pair(OldInstrNum, OldIdx),
std::make_pair(NewInstrNum, NewIdx));
}

MBB->erase(mi); // Nuke the old inst.

for (MachineInstr &MI : MIS)
DistanceMap.insert(std::make_pair(&MI, Dist++));
Dist--;

if (&*mi == NewMI) {
LLVM_DEBUG(dbgs() << "2addr: CONVERTED IN-PLACE TO 3-ADDR: " << *mi);
} else {
LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);

// If the old instruction is debug value tracked, an update is required.
if (auto OldInstrNum = mi->peekDebugInstrNum()) {
assert(mi->getNumExplicitDefs() == 1);
assert(NewMI->getNumExplicitDefs() == 1);

// Find the old and new def location.
unsigned OldIdx = mi->defs().begin()->getOperandNo();
unsigned NewIdx = NewMI->defs().begin()->getOperandNo();

// Record that one def has been replaced by the other.
unsigned NewInstrNum = NewMI->getDebugInstrNum();
MF->makeDebugValueSubstitution(std::make_pair(OldInstrNum, OldIdx),
std::make_pair(NewInstrNum, NewIdx));
}

MBB->erase(mi); // Nuke the old inst.
Dist--;
}

mi = NewMI;
nmi = std::next(mi);

Expand Down Expand Up @@ -1329,6 +1334,9 @@ bool TwoAddressInstructionImpl::tryInstructionTransform(

bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist);

// Give targets a chance to convert bundled instructions.
bool ConvertibleTo3Addr = MI.isConvertibleTo3Addr(MachineInstr::AnyInBundle);

// If the instruction is convertible to 3 Addr, instead
// of returning try 3 Addr transformation aggressively and
// use this variable to check later. Because it might be better.
Expand All @@ -1337,7 +1345,7 @@ bool TwoAddressInstructionImpl::tryInstructionTransform(
// addl %esi, %edi
// movl %edi, %eax
// ret
if (Commuted && !MI.isConvertibleTo3Addr())
if (Commuted && !ConvertibleTo3Addr)
return false;

if (shouldOnlyCommute)
Expand All @@ -1357,7 +1365,7 @@ bool TwoAddressInstructionImpl::tryInstructionTransform(
regBKilled = isKilled(MI, regB, true);
}

if (MI.isConvertibleTo3Addr()) {
if (ConvertibleTo3Addr) {
// This instruction is potentially convertible to a true
// three-address instruction. Check if it is profitable.
if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
Expand Down Expand Up @@ -1665,6 +1673,17 @@ void TwoAddressInstructionImpl::processTiedPairs(MachineInstr *MI,
// by SubRegB is compatible with RegA with no subregister. So regardless of
// whether the dest oper writes a subreg, the source oper should not.
MO.setSubReg(0);

// Update uses of RegB to uses of RegA inside the bundle.
if (MI->isBundle()) {
for (MachineOperand &MO : mi_bundle_ops(*MI)) {
if (MO.isReg() && MO.getReg() == RegB) {
assert(MO.getSubReg() == 0 && SubRegB == 0 &&
"tied subregister uses in bundled instructions not supported");
MO.setReg(RegA);
}
}
}
}

if (AllUsesCopied) {
Expand Down
58 changes: 54 additions & 4 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4047,10 +4047,29 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
LiveVariables *LV,
LiveIntervals *LIS) const {
MachineBasicBlock &MBB = *MI.getParent();
MachineInstr *CandidateMI = &MI;

if (MI.isBundle()) {
// This is a temporary placeholder for bundle handling that enables us to
// exercise the relevant code paths in the two-address instruction pass.
if (MI.getBundleSize() != 1)
return nullptr;
CandidateMI = MI.getNextNode();
}

ThreeAddressUpdates U;
MachineInstr *NewMI = convertToThreeAddressImpl(MI, U);
MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
if (!NewMI)
return nullptr;

if (NewMI) {
if (MI.isBundle()) {
CandidateMI->eraseFromBundle();

for (MachineOperand &MO : MI.all_defs()) {
if (MO.isTied())
MI.untieRegOperand(MO.getOperandNo());
}
} else {
updateLiveVariables(LV, MI, *NewMI);
if (LIS) {
LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
Expand Down Expand Up @@ -4091,7 +4110,22 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
LV->getVarInfo(DefReg).AliveBlocks.clear();
}

if (LIS) {
if (MI.isBundle()) {
VirtRegInfo VRI = AnalyzeVirtRegInBundle(MI, DefReg);
if (!VRI.Reads && !VRI.Writes) {
for (MachineOperand &MO : MI.all_uses()) {
if (MO.isReg() && MO.getReg() == DefReg) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test case with subregister operand?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See the discussion with Jay in #166212 -- I looked into it and decided to just prevent and forbid tied sub-registers on bundles in pre-RA as the safer route due to the complexities involved.

I'm adding an assert to that effect here.

assert(MO.getSubReg() == 0 &&
"tied sub-registers in bundles currently not supported");
MI.removeOperand(MO.getOperandNo());
break;
}
}

if (LIS)
LIS->shrinkToUses(&LIS->getInterval(DefReg));
}
} else if (LIS) {
LiveInterval &DefLI = LIS->getInterval(DefReg);

// We cannot delete the original instruction here, so hack out the use
Expand All @@ -4106,11 +4140,27 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
}
}

if (MI.isBundle()) {
VirtRegInfo VRI = AnalyzeVirtRegInBundle(MI, DefReg);
if (!VRI.Reads && !VRI.Writes) {
for (MachineOperand &MIOp : MI.uses()) {
if (MIOp.isReg() && MIOp.getReg() == DefReg) {
MIOp.setIsUndef(true);
MIOp.setReg(DummyReg);
}
}
}

auto MO = MachineOperand::CreateReg(DummyReg, false);
MO.setIsUndef(true);
MI.addOperand(MO);
}

LIS->shrinkToUses(&DefLI);
}
}

return NewMI;
return MI.isBundle() ? &MI : NewMI;
}

MachineInstr *
Expand Down
56 changes: 56 additions & 0 deletions llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 %s --passes=two-address-instruction -verify-each -o - | FileCheck --check-prefixes=GCN %s

# Exercise very basic handling of BUNDLE'd instructions by the two-address-instruction pass.

# This test is an example where it is best to keep the two-address instruction
# and resolve the tie with a COPY that is expected to be coalesced.
---
name: test_fmac_bundle
body: |
bb.0:

; GCN-LABEL: name: test_fmac_bundle
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
; GCN-NEXT: BUNDLE implicit-def [[COPY2]], implicit [[DEF]], implicit [[DEF1]], implicit [[COPY2]](tied-def 0), implicit $mode, implicit $exec {
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e32 killed [[DEF]], killed [[DEF1]], killed [[COPY2]], implicit $mode, implicit $exec
; GCN-NEXT: }
%10:vgpr_32 = COPY $vgpr0
%11:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_ADD_U32_e64 %10, %11, 0, implicit $exec
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
BUNDLE implicit-def %3:vgpr_32, implicit %0, implicit %1, implicit killed %2(tied-def 0), implicit $mode, implicit $exec {
%3:vgpr_32 = V_FMAC_F32_e32 killed %0, killed %1, killed %2, implicit $mode, implicit $exec
}

...

# This test is an example where conversion to three-address form is beneficial.
---
name: test_fmac_reuse_bundle
body: |
bb.0:

; GCN-LABEL: name: test_fmac_reuse_bundle
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: BUNDLE implicit-def %3, implicit [[DEF]], implicit [[DEF1]], implicit [[COPY]], implicit $mode, implicit $exec {
; GCN-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F32_e64 0, killed [[DEF]], 0, killed [[DEF1]], 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: }
; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FMA_F32_e64_]], [[COPY]], 0, implicit $exec
%2:vgpr_32 = COPY $vgpr0
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
BUNDLE implicit-def %3:vgpr_32, implicit %0, implicit %1, implicit %2(tied-def 0), implicit $mode, implicit $exec {
%3:vgpr_32 = V_FMAC_F32_e32 killed %0, killed %1, killed %2, implicit $mode, implicit $exec
}
%4:vgpr_32 = V_ADD_U32_e64 %3, %2, 0, implicit $exec

...
Loading