Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions llvm/include/llvm/CodeGen/MachineRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
namespace llvm {

class PSetIterator;
class VirtRegMap;

/// Convenient type to represent either a register class or a register bank.
using RegClassOrRegBank =
Expand Down Expand Up @@ -107,6 +108,12 @@ class MachineRegisterInfo {
VirtReg2IndexFunctor>
RegAllocHints;

/// AntiHintRegs - This vector records register anti-hints for
/// virtual registers. For each virtual register, it keeps a vector of virtual
/// registers that should NOT be allocated to the same or overlapping physical
/// registers.
IndexedMap<SmallVector<Register, 4>, VirtReg2IndexFunctor> AntiHintRegs;

/// PhysRegUseDefLists - This is an array of the head of the use/def list for
/// physical registers.
std::unique_ptr<MachineOperand *[]> PhysRegUseDefLists;
Expand Down Expand Up @@ -860,6 +867,56 @@ class MachineRegisterInfo {
return RegAllocHints.inBounds(VReg) ? &RegAllocHints[VReg] : nullptr;
}

/// addRegAllocAntiHint - Add a register allocation anti-hint for the
/// specified virtual register. This tells the allocator to avoid allocating
/// VReg to the same physical register as AntiHintVReg (or overlapping ones).
void addRegAllocAntiHint(Register VReg, Register AntiHintVReg) {
assert(VReg.isVirtual() && "Anti-hints are only for virtual registers");
assert(AntiHintVReg.isVirtual() && "Anti-hint target must be virtual");
AntiHintRegs.grow(VReg);
SmallVector<Register, 4> &AntiHints = AntiHintRegs[VReg];
// Avoid duplicates
if (llvm::find(AntiHints, AntiHintVReg) == AntiHints.end())
AntiHints.push_back(AntiHintVReg);
}

/// addRegAllocationAntiHint - Add multiple anti-hints at once.
void addRegAllocationAntiHints(Register VReg,
ArrayRef<Register> AntiHintVRegs) {
for (Register AntiHint : AntiHintVRegs)
addRegAllocAntiHint(VReg, AntiHint);
}

/// clearRegAllocationAntiHints - Clear all anti-hints for a register.
void clearRegAllocationAntiHints(Register VReg) {
assert(VReg.isVirtual());
if (AntiHintRegs.inBounds(VReg))
AntiHintRegs[VReg].clear();
}

/// getRegAllocationAntiHints - Return the vector of anti-hints for VReg.
ArrayRef<Register> getRegAllocationAntiHints(Register VReg) const {
assert(VReg.isVirtual());
if (!AntiHintRegs.inBounds(VReg))
return ArrayRef<Register>();
return AntiHintRegs[VReg];
}

/// hasRegAllocationAntiHint - Check if VReg has AntiHintVReg as an anti-hint.
bool hasRegAllocationAntiHint(Register VReg, Register AntiHintVReg) const {
assert(VReg.isVirtual() && AntiHintVReg.isVirtual());
if (!AntiHintRegs.inBounds(VReg))
return false;
const SmallVector<Register, 4> &AntiHints = AntiHintRegs[VReg];
return llvm::find(AntiHints, AntiHintVReg) != AntiHints.end();
}

/// getPhysRegAntiHints - Get the set of physical registers to avoid.
/// VRM is the current virtual register map showing allocations made so far.
void getPhysRegAntiHints(Register VReg,
SmallVectorImpl<MCPhysReg> &PhysAntiHints,
const VirtRegMap &VRM) const;

/// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the
/// specified register as undefined which causes the DBG_VALUE to be
/// deleted during LiveDebugVariables analysis.
Expand Down
72 changes: 69 additions & 3 deletions llvm/lib/CodeGen/AllocationOrder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ AllocationOrder AllocationOrder::create(Register VirtReg, const VirtRegMap &VRM,
const LiveRegMatrix *Matrix) {
const MachineFunction &MF = VRM.getMachineFunction();
const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
auto Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg));
SmallVector<MCPhysReg, 16> Hints;
bool HardHints =
Expand All @@ -44,8 +45,73 @@ AllocationOrder AllocationOrder::create(Register VirtReg, const VirtRegMap &VRM,
dbgs() << '\n';
}
});
assert(all_of(Hints,
[&](MCPhysReg Hint) { return is_contained(Order, Hint); }) &&

// Get anti-hints
SmallVector<MCPhysReg, 16> AntiHintedPhysRegs;
MRI.getPhysRegAntiHints(VirtReg, AntiHintedPhysRegs, VRM);

LLVM_DEBUG({
if (!AntiHintedPhysRegs.empty()) {
dbgs() << "anti-hints:";
for (MCPhysReg AntiHint : AntiHintedPhysRegs)
dbgs() << ' ' << printReg(AntiHint, TRI);
dbgs() << '\n';
}
});

// Create allocation order object
AllocationOrder AO(std::move(Hints), Order, HardHints);

// Apply anti-hints filtering if needed
if (!AntiHintedPhysRegs.empty()) {
AO.applyAntiHints(AntiHintedPhysRegs, TRI);

LLVM_DEBUG({
if (!AO.Hints.empty()) {
dbgs() << "filtered hints:";
for (MCPhysReg Hint : AO.Hints)
dbgs() << ' ' << printReg(Hint, TRI);
dbgs() << '\n';
}
});
}

assert(all_of(AO.Hints,
[&](MCPhysReg Hint) { return is_contained(AO.Order, Hint); }) &&
"Target hint is outside allocation order.");
return AllocationOrder(std::move(Hints), Order, HardHints);
return AO;
}

void AllocationOrder::applyAntiHints(ArrayRef<MCPhysReg> AntiHintedPhysRegs,
const TargetRegisterInfo *TRI) {
// Helper to check if a register overlaps with any anti-hint
auto isAntiHinted = [&](MCPhysReg Reg) {
return std::any_of(
AntiHintedPhysRegs.begin(), AntiHintedPhysRegs.end(),
[&](MCPhysReg AntiHint) { return TRI->regsOverlap(Reg, AntiHint); });
};

// Create filtered order
FilteredOrderStorage.clear();
FilteredOrderStorage.assign(Order.begin(), Order.end());

// Partition: non-anti-hinted registers go first
auto PartitionPoint = std::stable_partition(
FilteredOrderStorage.begin(), FilteredOrderStorage.end(),
[&](MCPhysReg Reg) { return !isAntiHinted(Reg); });

// Update Order
Order = FilteredOrderStorage;

LLVM_DEBUG({
size_t NonAntiHintedCount =
std::distance(FilteredOrderStorage.begin(), PartitionPoint);
size_t AntiHintedCount =
std::distance(PartitionPoint, FilteredOrderStorage.end());
dbgs() << " Added " << NonAntiHintedCount
<< " non-anti-hinted registers first\n"
<< " Added " << AntiHintedCount
<< " anti-hinted registers at the end\n"
<< " Anti-hint filtering complete\n";
});
}
8 changes: 8 additions & 0 deletions llvm/lib/CodeGen/AllocationOrder.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"

namespace llvm {

Expand All @@ -29,6 +30,9 @@ class LiveRegMatrix;

class LLVM_LIBRARY_VISIBILITY AllocationOrder {
const SmallVector<MCPhysReg, 16> Hints;
// Used as storage if the Order received in the constructor needs to be
// altered.
SmallVector<MCPhysReg, 16> FilteredOrderStorage;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
SmallVector<MCPhysReg, 16> FilteredOrderStorage;
// Used as storage if the Order received in the constructor needs to be altered.
SmallVector<MCPhysReg, 16> FilteredOrderStorage;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed it. Please check.

ArrayRef<MCPhysReg> Order;
// How far into the Order we can iterate. This is 0 if the AllocationOrder is
// constructed with HardHints = true, Order.size() otherwise. While
Expand Down Expand Up @@ -117,6 +121,10 @@ class LLVM_LIBRARY_VISIBILITY AllocationOrder {
static_cast<uint32_t>(std::numeric_limits<MCPhysReg>::max()));
return Reg.isPhysical() && is_contained(Hints, Reg.id());
}

/// Apply anti-hints to the allocation order.
void applyAntiHints(ArrayRef<MCPhysReg> AntiHintedPhysRegs,
const TargetRegisterInfo *TRI);
};

} // end namespace llvm
Expand Down
26 changes: 26 additions & 0 deletions llvm/lib/CodeGen/MachineRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,18 @@
//===----------------------------------------------------------------------===//

#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/DebugLoc.h"
Expand Down Expand Up @@ -674,3 +677,26 @@ bool MachineRegisterInfo::isReservedRegUnit(unsigned Unit) const {
}
return false;
}

void MachineRegisterInfo::getPhysRegAntiHints(
Register VReg, SmallVectorImpl<MCPhysReg> &PhysAntiHints,
const VirtRegMap &VRM) const {
assert(VReg.isVirtual());
if (!AntiHintRegs.inBounds(VReg))
return;

const SmallVector<Register, 4> &AntiHints = AntiHintRegs[VReg];

for (Register AntiHintVReg : AntiHints) {
// Check if the anti-hinted register has been allocated
if (VRM.hasPhys(AntiHintVReg)) {
MCPhysReg PhysReg = VRM.getPhys(AntiHintVReg);
// Add the physical register
PhysAntiHints.push_back(PhysReg);
}
}

// Remove duplicates
llvm::sort(PhysAntiHints);
PhysAntiHints.erase(llvm::unique(PhysAntiHints), PhysAntiHints.end());
}
89 changes: 89 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,19 @@
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/InitializePasses.h"

using namespace llvm;

#define DEBUG_TYPE "amdgpu-pre-ra-optimizations"

static cl::opt<bool>
EnableAntiHintsForMFMARegs("amdgpu-anti-hints-for-mfma", cl::Hidden,
cl::desc("Enable Anti-Hints for "
"MFMA in GCNPreRAOptimizations stage."),
cl::init(true));

namespace {

class GCNPreRAOptimizationsImpl {
Expand Down Expand Up @@ -247,6 +254,88 @@ bool GCNPreRAOptimizationsImpl::run(MachineFunction &MF) {
TRI = ST.getRegisterInfo();

bool Changed = false;
// Add RA anti-hints to reduce MFMA hazard NOPs
if (EnableAntiHintsForMFMARegs && ST.hasMAIInsts()) {
// Max lookback window for RAW or WAW hazard
constexpr unsigned MaxLookbackWindow = 19;
for (const MachineBasicBlock &MBB : MF) {
SmallVector<SmallVector<Register, 4>, 16> RecentMFMAs;
for (const MachineInstr &MI : MBB) {
if (MI.isDebugInstr())
continue;

// Handle MFMA instructions
if (SIInstrInfo::isMFMA(MI)) {
SmallVector<Register, 4> MFMARegisters;
// Helper to get named operand
auto collectNamedOperand = [&](AMDGPU::OpName OpName,
const char *OpNameStr) {
const MachineOperand *MO = TII->getNamedOperand(MI, OpName);
if (!MO) {
LLVM_DEBUG(dbgs() << " Named operand " << OpNameStr
<< " not found\n");
return;
}
if (MO->isReg() && MO->getReg().isVirtual()) {
Register Reg = MO->getReg();
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
// Only consider VGPRs
if (TRI->hasVGPRs(RC))
MFMARegisters.push_back(Reg);
LLVM_DEBUG(dbgs() << " Collected " << OpNameStr << " : "
<< printReg(Reg, TRI) << "\n");
}
};

// Collect destination and source C registers
collectNamedOperand(AMDGPU::OpName::vdst, "vdst"); // Destination
collectNamedOperand(AMDGPU::OpName::src2,
"src2"); // Matrix C (accumulator)
if (!MFMARegisters.empty()) {
RecentMFMAs.emplace_back(std::move(MFMARegisters));
// Maintain window
if (RecentMFMAs.size() > MaxLookbackWindow)
RecentMFMAs.erase(RecentMFMAs.begin());
}
continue;
}
bool ShouldCheckReuse = MI.mayLoad() || MI.mayStore() || MI.isCopy() ||
SIInstrInfo::isVALU(MI);
// Skip non-relevant instructions, or skip until at least one MFMA is
// encountered
if (!ShouldCheckReuse || RecentMFMAs.empty())
continue;

// Process operands that might reuse MFMA registers
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.getReg().isVirtual())
continue;

const Register CandidateReg = MO.getReg();
const TargetRegisterClass *CandidateRC =
MRI->getRegClass(CandidateReg);

// Only process VGPR registers
if (!TRI->isVGPRClass(CandidateRC))
continue;
for (auto It = RecentMFMAs.rbegin(); It != RecentMFMAs.rend(); ++It) {
const SmallVector<Register, 4> &MFMARegs = *It;
for (Register MFMAReg : MFMARegs) {
// Check if MFMA register is dead at current instruction
const LiveInterval &MFMAInterval = LIS->getInterval(MFMAReg);
const SlotIndex CurrentSlot =
LIS->getInstructionIndex(MI).getRegSlot();
if (!MFMAInterval.liveAt(CurrentSlot)) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are you storing the SlotIndex for all the MFMAs, if you're only interested in the CurrentSlot?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed it. Please check.

// Add bi-directional anti-hints
MRI->addRegAllocationAntiHints(CandidateReg, MFMAReg);
MRI->addRegAllocationAntiHints(MFMAReg, CandidateReg);
}
}
}
}
}
}
}

for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
Register Reg = Register::index2VirtReg(I);
Expand Down
Loading