diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index dfd27aa91e9db..2bbe430dc68d9 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1415,6 +1415,8 @@ class TargetInstrInfo : public MCInstrInfo { /// Get the base operand and byte offset of an instruction that reads/writes /// memory. This is a convenience function for callers that are only prepared /// to handle a single base operand. + /// FIXME: Move Offset and OffsetIsScalable to some ElementCount-style + /// abstraction that supports negative offsets. bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, @@ -1427,6 +1429,8 @@ class TargetInstrInfo : public MCInstrInfo { /// It returns false if base operands and offset could not be determined. /// It is not guaranteed to always recognize base operands and offsets in all /// cases. + /// FIXME: Move Offset and OffsetIsScalable to some ElementCount-style + /// abstraction that supports negative offsets. virtual bool getMemOperandsWithOffsetWidth( const MachineInstr &MI, SmallVectorImpl &BaseOps, int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, @@ -1497,12 +1501,18 @@ class TargetInstrInfo : public MCInstrInfo { /// to TargetPassConfig::createMachineScheduler() to have an effect. /// /// \p BaseOps1 and \p BaseOps2 are memory operands of two memory operations. + /// \p Offset1 and \p Offset2 are the byte offsets for the memory + /// operations. + /// \p OffsetIsScalable1 and \p OffsetIsScalable2 indicate if the offset is + /// scaled by a runtime quantity. /// \p ClusterSize is the number of operations in the resulting load/store /// cluster if this hook returns true. /// \p NumBytes is the number of bytes that will be loaded from all the /// clustered loads if this hook returns true. virtual bool shouldClusterMemOps(ArrayRef BaseOps1, + int64_t Offset1, bool OffsetIsScalable1, ArrayRef BaseOps2, + int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const { llvm_unreachable("target did not implement shouldClusterMemOps()"); diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 4add33ba0996a..c51ef33bfe54a 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -1698,11 +1698,12 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation { SmallVector BaseOps; int64_t Offset; unsigned Width; + bool OffsetIsScalable; MemOpInfo(SUnit *SU, ArrayRef BaseOps, - int64_t Offset, unsigned Width) + int64_t Offset, bool OffsetIsScalable, unsigned Width) : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset), - Width(Width) {} + Width(Width), OffsetIsScalable(OffsetIsScalable) {} static bool Compare(const MachineOperand *const &A, const MachineOperand *const &B) { @@ -1831,8 +1832,10 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width; } - if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength, - CurrentClusterBytes)) + if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset, + MemOpa.OffsetIsScalable, MemOpb.BaseOps, + MemOpb.Offset, MemOpb.OffsetIsScalable, + ClusterLength, CurrentClusterBytes)) continue; SUnit *SUa = MemOpa.SU; @@ -1899,7 +1902,8 @@ void BaseMemOpClusterMutation::collectMemOpRecords( unsigned Width; if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, OffsetIsScalable, Width, TRI)) { - MemOpRecords.push_back(MemOpInfo(&SU, BaseOps, Offset, Width)); + MemOpRecords.push_back( + MemOpInfo(&SU, BaseOps, Offset, OffsetIsScalable, Width)); LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: " << Offset << ", OffsetIsScalable: " << OffsetIsScalable diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 6f4c6f5ad073d..93b8295f4f3ef 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4238,8 +4238,9 @@ static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, /// /// Only called for LdSt for which getMemOperandWithOffset returns true. bool AArch64InstrInfo::shouldClusterMemOps( - ArrayRef BaseOps1, - ArrayRef BaseOps2, unsigned ClusterSize, + ArrayRef BaseOps1, int64_t OpOffset1, + bool OffsetIsScalable1, ArrayRef BaseOps2, + int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const { assert(BaseOps1.size() == 1 && BaseOps2.size() == 1); const MachineOperand &BaseOp1 = *BaseOps1.front(); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index b259efb9f2e77..e97ff0a9758d6 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -179,7 +179,9 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { int64_t &MinOffset, int64_t &MaxOffset); bool shouldClusterMemOps(ArrayRef BaseOps1, + int64_t Offset1, bool OffsetIsScalable1, ArrayRef BaseOps2, + int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override; diff --git a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp index 50f8ad4433c6d..442ae4dd7b34f 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp @@ -232,7 +232,10 @@ class SIInsertHardClauses : public MachineFunctionPass { // scheduler it limits the size of the cluster to avoid increasing // register pressure too much, but this pass runs after register // allocation so there is no need for that kind of limit. - !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) { + // We also lie about the Offset and OffsetIsScalable parameters, + // as they aren't used in the SIInstrInfo implementation. + !SII->shouldClusterMemOps(CI.BaseOps, 0, false, BaseOps, 0, false, + 2, 2)))) { // Finish the current clause. Changed |= emitClause(CI, SII); CI = ClauseInfo(); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index b5b456d691254..0a06fa88b6b10 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -541,7 +541,9 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, } bool SIInstrInfo::shouldClusterMemOps(ArrayRef BaseOps1, + int64_t Offset1, bool OffsetIsScalable1, ArrayRef BaseOps2, + int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const { // If the mem ops (to be clustered) do not have the same base ptr, then they diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index e388b5550cb10..0ce31ac6d54ec 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -234,7 +234,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { const TargetRegisterInfo *TRI) const final; bool shouldClusterMemOps(ArrayRef BaseOps1, + int64_t Offset1, bool OffsetIsScalable1, ArrayRef BaseOps2, + int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 49d003db8ffc9..21cd27d627113 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2877,8 +2877,9 @@ static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc, } bool PPCInstrInfo::shouldClusterMemOps( - ArrayRef BaseOps1, - ArrayRef BaseOps2, unsigned ClusterSize, + ArrayRef BaseOps1, int64_t OpOffset1, + bool OffsetIsScalable1, ArrayRef BaseOps2, + int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const { assert(BaseOps1.size() == 1 && BaseOps2.size() == 1); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index a8dc7d6d0e37a..2f0b9498411ae 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -532,7 +532,9 @@ class PPCInstrInfo : public PPCGenInstrInfo { /// Returns true if the two given memory operations should be scheduled /// adjacent. bool shouldClusterMemOps(ArrayRef BaseOps1, + int64_t Offset1, bool OffsetIsScalable1, ArrayRef BaseOps2, + int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 8d79fc44a208e..1dcff7eb563e2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -2266,8 +2266,9 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, } bool RISCVInstrInfo::shouldClusterMemOps( - ArrayRef BaseOps1, - ArrayRef BaseOps2, unsigned ClusterSize, + ArrayRef BaseOps1, int64_t Offset1, + bool OffsetIsScalable1, ArrayRef BaseOps2, + int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const { // If the mem ops (to be clustered) do not have the same base ptr, then they // should not be clustered diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 0954286a419bd..7e1d3f3118065 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -158,7 +158,9 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { const TargetRegisterInfo *TRI) const override; bool shouldClusterMemOps(ArrayRef BaseOps1, + int64_t Offset1, bool OffsetIsScalable1, ArrayRef BaseOps2, + int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override;