Skip to content

Commit

Permalink
[MachineScheduler][NFCI] Add Offset and OffsetIsScalable args to shou…
Browse files Browse the repository at this point in the history
…ldClusterMemOps (#73778)

These are picked up from getMemOperandsWithOffsetWidth but weren't then
being passed through to shouldClusterMemOps, which forces backends to
collect the information again if they want to use the kind of heuristics
typically used for the similar shouldScheduleLoadsNear function (e.g.
checking the offset is within 1 cache line).

This patch just adds the parameters, but doesn't attempt to use them.
There is potential to use them in the current PPC and AArch64
shouldClusterMemOps implementation, and I intend to use the offset in
the heuristic for RISC-V. I've left these for future patches in the
interest of being as incremental as possible.

As noted in the review and in an inline FIXME, an ElementCount-style abstraction may later be used to condense these two parameters to one argument. ElementCount isn't quite suitable as it doesn't support negative offsets.
  • Loading branch information
asb committed Dec 6, 2023
1 parent 10f7801 commit b717365
Show file tree
Hide file tree
Showing 11 changed files with 42 additions and 12 deletions.
10 changes: 10 additions & 0 deletions llvm/include/llvm/CodeGen/TargetInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1415,6 +1415,8 @@ class TargetInstrInfo : public MCInstrInfo {
/// Get the base operand and byte offset of an instruction that reads/writes
/// memory. This is a convenience function for callers that are only prepared
/// to handle a single base operand.
/// FIXME: Move Offset and OffsetIsScalable to some ElementCount-style
/// abstraction that supports negative offsets.
bool getMemOperandWithOffset(const MachineInstr &MI,
const MachineOperand *&BaseOp, int64_t &Offset,
bool &OffsetIsScalable,
Expand All @@ -1427,6 +1429,8 @@ class TargetInstrInfo : public MCInstrInfo {
/// It returns false if base operands and offset could not be determined.
/// It is not guaranteed to always recognize base operands and offsets in all
/// cases.
/// FIXME: Move Offset and OffsetIsScalable to some ElementCount-style
/// abstraction that supports negative offsets.
virtual bool getMemOperandsWithOffsetWidth(
const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
Expand Down Expand Up @@ -1497,12 +1501,18 @@ class TargetInstrInfo : public MCInstrInfo {
/// to TargetPassConfig::createMachineScheduler() to have an effect.
///
/// \p BaseOps1 and \p BaseOps2 are memory operands of two memory operations.
/// \p Offset1 and \p Offset2 are the byte offsets for the memory
/// operations.
/// \p OffsetIsScalable1 and \p OffsetIsScalable2 indicate if the offset is
/// scaled by a runtime quantity.
/// \p ClusterSize is the number of operations in the resulting load/store
/// cluster if this hook returns true.
/// \p NumBytes is the number of bytes that will be loaded from all the
/// clustered loads if this hook returns true.
virtual bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
int64_t Offset1, bool OffsetIsScalable1,
ArrayRef<const MachineOperand *> BaseOps2,
int64_t Offset2, bool OffsetIsScalable2,
unsigned ClusterSize,
unsigned NumBytes) const {
llvm_unreachable("target did not implement shouldClusterMemOps()");
Expand Down
14 changes: 9 additions & 5 deletions llvm/lib/CodeGen/MachineScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1698,11 +1698,12 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
SmallVector<const MachineOperand *, 4> BaseOps;
int64_t Offset;
unsigned Width;
bool OffsetIsScalable;

MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
int64_t Offset, unsigned Width)
int64_t Offset, bool OffsetIsScalable, unsigned Width)
: SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
Width(Width) {}
Width(Width), OffsetIsScalable(OffsetIsScalable) {}

static bool Compare(const MachineOperand *const &A,
const MachineOperand *const &B) {
Expand Down Expand Up @@ -1831,8 +1832,10 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width;
}

if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
CurrentClusterBytes))
if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset,
MemOpa.OffsetIsScalable, MemOpb.BaseOps,
MemOpb.Offset, MemOpb.OffsetIsScalable,
ClusterLength, CurrentClusterBytes))
continue;

SUnit *SUa = MemOpa.SU;
Expand Down Expand Up @@ -1899,7 +1902,8 @@ void BaseMemOpClusterMutation::collectMemOpRecords(
unsigned Width;
if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
OffsetIsScalable, Width, TRI)) {
MemOpRecords.push_back(MemOpInfo(&SU, BaseOps, Offset, Width));
MemOpRecords.push_back(
MemOpInfo(&SU, BaseOps, Offset, OffsetIsScalable, Width));

LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
<< Offset << ", OffsetIsScalable: " << OffsetIsScalable
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4238,8 +4238,9 @@ static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
///
/// Only called for LdSt for which getMemOperandWithOffset returns true.
bool AArch64InstrInfo::shouldClusterMemOps(
ArrayRef<const MachineOperand *> BaseOps1,
ArrayRef<const MachineOperand *> BaseOps2, unsigned ClusterSize,
ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
unsigned NumBytes) const {
assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
const MachineOperand &BaseOp1 = *BaseOps1.front();
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,9 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
int64_t &MinOffset, int64_t &MaxOffset);

bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
int64_t Offset1, bool OffsetIsScalable1,
ArrayRef<const MachineOperand *> BaseOps2,
int64_t Offset2, bool OffsetIsScalable2,
unsigned ClusterSize,
unsigned NumBytes) const override;

Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,10 @@ class SIInsertHardClauses : public MachineFunctionPass {
// scheduler it limits the size of the cluster to avoid increasing
// register pressure too much, but this pass runs after register
// allocation so there is no need for that kind of limit.
!SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) {
// We also lie about the Offset and OffsetIsScalable parameters,
// as they aren't used in the SIInstrInfo implementation.
!SII->shouldClusterMemOps(CI.BaseOps, 0, false, BaseOps, 0, false,
2, 2)))) {
// Finish the current clause.
Changed |= emitClause(CI, SII);
CI = ClauseInfo();
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,9 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
}

bool SIInstrInfo::shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
int64_t Offset1, bool OffsetIsScalable1,
ArrayRef<const MachineOperand *> BaseOps2,
int64_t Offset2, bool OffsetIsScalable2,
unsigned ClusterSize,
unsigned NumBytes) const {
// If the mem ops (to be clustered) do not have the same base ptr, then they
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
const TargetRegisterInfo *TRI) const final;

bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
int64_t Offset1, bool OffsetIsScalable1,
ArrayRef<const MachineOperand *> BaseOps2,
int64_t Offset2, bool OffsetIsScalable2,
unsigned ClusterSize,
unsigned NumBytes) const override;

Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2877,8 +2877,9 @@ static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc,
}

bool PPCInstrInfo::shouldClusterMemOps(
ArrayRef<const MachineOperand *> BaseOps1,
ArrayRef<const MachineOperand *> BaseOps2, unsigned ClusterSize,
ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
unsigned NumBytes) const {

assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,9 @@ class PPCInstrInfo : public PPCGenInstrInfo {
/// Returns true if the two given memory operations should be scheduled
/// adjacent.
bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
int64_t Offset1, bool OffsetIsScalable1,
ArrayRef<const MachineOperand *> BaseOps2,
int64_t Offset2, bool OffsetIsScalable2,
unsigned ClusterSize,
unsigned NumBytes) const override;

Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2266,8 +2266,9 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
}

bool RISCVInstrInfo::shouldClusterMemOps(
ArrayRef<const MachineOperand *> BaseOps1,
ArrayRef<const MachineOperand *> BaseOps2, unsigned ClusterSize,
ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1,
bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize,
unsigned NumBytes) const {
// If the mem ops (to be clustered) do not have the same base ptr, then they
// should not be clustered
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,9 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
const TargetRegisterInfo *TRI) const override;

bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
int64_t Offset1, bool OffsetIsScalable1,
ArrayRef<const MachineOperand *> BaseOps2,
int64_t Offset2, bool OffsetIsScalable2,
unsigned ClusterSize,
unsigned NumBytes) const override;

Expand Down

0 comments on commit b717365

Please sign in to comment.