diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 6a624a7052cdd..6589f85937724 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1848,6 +1848,13 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo { const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const; + /// Compute the latency of a register data dependence (DefIdx -> UseIdx) + /// using the TargetSchedModel. + virtual std::optional + getOperandLatency(const TargetSchedModel &SchedModel, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const; + /// Compute the instruction latency of a given instruction. /// If the instruction has higher cost when predicated, it's returned via /// PredCost. @@ -1855,6 +1862,11 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo { const MachineInstr &MI, unsigned *PredCost = nullptr) const; + /// Compute the instruction latency using the TargetSchedModel. + virtual std::optional + getInstrLatency(const TargetSchedModel &TargetSchedModel, + const MachineInstr &MI) const; + virtual unsigned getPredicationCost(const MachineInstr &MI) const; virtual unsigned getInstrLatency(const InstrItineraryData *ItinData, diff --git a/llvm/include/llvm/CodeGen/TargetSchedule.h b/llvm/include/llvm/CodeGen/TargetSchedule.h index 74f8ed5a18d33..b033ed65aed1a 100644 --- a/llvm/include/llvm/CodeGen/TargetSchedule.h +++ b/llvm/include/llvm/CodeGen/TargetSchedule.h @@ -44,8 +44,6 @@ class TargetSchedModel { // Resource units per cycle. Latency normalization factor. unsigned ResourceLCM = 0; - unsigned computeInstrLatency(const MCSchedClassDesc &SCDesc) const; - // EnableSchedModel and EnableSchedItins are used to control whether or not to // use the Target's {SchedMachineModel, InstrItins} for hardware infor based // Scheduling decisions. If both are enabled, as is the default, preference @@ -203,6 +201,7 @@ class TargetSchedModel { bool UseDefaultDefLatency = true) const; LLVM_ABI unsigned computeInstrLatency(const MCInst &Inst) const; LLVM_ABI unsigned computeInstrLatency(unsigned Opcode) const; + LLVM_ABI unsigned computeInstrLatency(const MCSchedClassDesc &SCDesc) const; /// Output dependency latency of a pair of defs of the same register. /// diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index b0009560d3fcb..70c9d86e29471 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -1830,6 +1830,85 @@ unsigned TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, return ItinData->getStageLatency(MI.getDesc().getSchedClass()); } +std::optional +TargetInstrInfo::getInstrLatency(const TargetSchedModel &TargetSchedModel, + const MachineInstr &MI) const { + if (TargetSchedModel.hasInstrSchedModel()) { + const MCSchedClassDesc *SCDesc = TargetSchedModel.resolveSchedClass(&MI); + if (SCDesc->isValid()) + return TargetSchedModel.computeInstrLatency(*SCDesc); + } + + return std::nullopt; +} + +std::optional TargetInstrInfo::getOperandLatency(const TargetSchedModel &SchedModel, + const MachineInstr *DefMI, + unsigned DefOperIdx, + const MachineInstr *UseMI, + unsigned UseOperIdx) const { + // Only handle the TargetSchedModel-based computation here. If no + // instruction scheduling model is available, defer to the caller. + if (!SchedModel.hasInstrSchedModel()) + return std::nullopt; + + const MCSchedClassDesc *SCDesc = SchedModel.resolveSchedClass(DefMI); + if (!SCDesc->isValid()) + return std::nullopt; + + // Compute DefIdx from operand index. + unsigned DefIdx = 0; + for (unsigned I = 0; I != DefOperIdx; ++I) { + const MachineOperand &MO = DefMI->getOperand(I); + if (MO.isReg() && MO.isDef()) + ++DefIdx; + } + if (DefIdx < SCDesc->NumWriteLatencyEntries) { + // Lookup the definition's write latency in SubtargetInfo. + const TargetSubtargetInfo *STI = SchedModel.getSubtargetInfo(); + const MCWriteLatencyEntry *WLEntry = STI->getWriteLatencyEntry(SCDesc, DefIdx); + unsigned WriteID = WLEntry->WriteResourceID; + unsigned Latency = WLEntry->Cycles >= 0 ? static_cast(WLEntry->Cycles) : 1000u; + if (!UseMI) + return Latency; + + // Lookup the use's latency adjustment in SubtargetInfo. + const MCSchedClassDesc *UseDesc = SchedModel.resolveSchedClass(UseMI); + if (UseDesc->NumReadAdvanceEntries == 0) + return Latency; + // Compute UseIdx from operand index. + unsigned UseIdx = 0; + for (unsigned I = 0; I != UseOperIdx; ++I) { + const MachineOperand &MO = UseMI->getOperand(I); + if (MO.isReg() && MO.readsReg() && !MO.isDef()) + ++UseIdx; + } + int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID); + if (Advance > 0 && static_cast(Advance) > Latency) // unsigned wrap + return 0; + return Latency - Advance; + } + + // If DefIdx does not exist in the model (e.g. implicit defs), then return + // unit latency (defaultDefLatency may be too conservative). +#ifndef NDEBUG + if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() && + !DefMI->getDesc().operands()[DefOperIdx].isOptionalDef() && + SchedModel.getMCSchedModel()->isComplete()) { + errs() << "DefIdx " << DefIdx << " exceeds machine model writes for " + << *DefMI + << " (Try with MCSchedModel.CompleteModel set to false)"; + llvm_unreachable("incomplete machine model"); + } +#endif + + // FIXME: Automatically giving all implicit defs defaultDefLatency is + // undesirable. We should only do it for defs that are known to the MC + // desc like flags. Truly implicit defs should get 1 cycle latency. + const MCSchedModel *MCSM = SchedModel.getMCSchedModel(); + return DefMI->isTransient() ? 0 : defaultDefLatency(*MCSM, *DefMI); +} + bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, const MachineInstr &DefMI, unsigned DefIdx) const { diff --git a/llvm/lib/CodeGen/TargetSchedule.cpp b/llvm/lib/CodeGen/TargetSchedule.cpp index 7ae9e0e37bbab..210f3e247200a 100644 --- a/llvm/lib/CodeGen/TargetSchedule.cpp +++ b/llvm/lib/CodeGen/TargetSchedule.cpp @@ -134,37 +134,6 @@ resolveSchedClass(const MachineInstr *MI) const { return SCDesc; } -/// Find the def index of this operand. This index maps to the machine model and -/// is independent of use operands. Def operands may be reordered with uses or -/// merged with uses without affecting the def index (e.g. before/after -/// regalloc). However, an instruction's def operands must never be reordered -/// with respect to each other. -static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) { - unsigned DefIdx = 0; - for (unsigned i = 0; i != DefOperIdx; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isDef()) - ++DefIdx; - } - return DefIdx; -} - -/// Find the use index of this operand. This is independent of the instruction's -/// def operands. -/// -/// Note that uses are not determined by the operand's isUse property, which -/// is simply the inverse of isDef. Here we consider any readsReg operand to be -/// a "use". The machine model allows an operand to be both a Def and Use. -static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) { - unsigned UseIdx = 0; - for (unsigned i = 0; i != UseOperIdx; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.readsReg() && !MO.isDef()) - ++UseIdx; - } - return UseIdx; -} - // Top-level API for clients that know the operand indices. This doesn't need to // return std::optional, as it always returns a valid latency. unsigned TargetSchedModel::computeOperandLatency( @@ -177,8 +146,8 @@ unsigned TargetSchedModel::computeOperandLatency( if (!hasInstrSchedModel() && !hasInstrItineraries()) return DefaultDefLatency; + std::optional OperLatency; if (hasInstrItineraries()) { - std::optional OperLatency; if (UseMI) { OperLatency = TII->getOperandLatency(&InstrItins, *DefMI, DefOperIdx, *UseMI, UseOperIdx); @@ -195,41 +164,11 @@ unsigned TargetSchedModel::computeOperandLatency( } // hasInstrSchedModel() - const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); - unsigned DefIdx = findDefIdx(DefMI, DefOperIdx); - if (DefIdx < SCDesc->NumWriteLatencyEntries) { - // Lookup the definition's write latency in SubtargetInfo. - const MCWriteLatencyEntry *WLEntry = - STI->getWriteLatencyEntry(SCDesc, DefIdx); - unsigned WriteID = WLEntry->WriteResourceID; - unsigned Latency = capLatency(WLEntry->Cycles); - if (!UseMI) - return Latency; - - // Lookup the use's latency adjustment in SubtargetInfo. - const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI); - if (UseDesc->NumReadAdvanceEntries == 0) - return Latency; - unsigned UseIdx = findUseIdx(UseMI, UseOperIdx); - int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID); - if (Advance > 0 && (unsigned)Advance > Latency) // unsigned wrap - return 0; - return Latency - Advance; - } - // If DefIdx does not exist in the model (e.g. implicit defs), then return - // unit latency (defaultDefLatency may be too conservative). -#ifndef NDEBUG - if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() && - !DefMI->getDesc().operands()[DefOperIdx].isOptionalDef() && - SchedModel.isComplete()) { - errs() << "DefIdx " << DefIdx << " exceeds machine model writes for " - << *DefMI << " (Try with MCSchedModel.CompleteModel set to false)"; - llvm_unreachable("incomplete machine model"); - } -#endif - // FIXME: Automatically giving all implicit defs defaultDefLatency is - // undesirable. We should only do it for defs that are known to the MC - // desc like flags. Truly implicit defs should get 1 cycle latency. + OperLatency = + TII->getOperandLatency(*this, DefMI, DefOperIdx, UseMI, UseOperIdx); + if (OperLatency) + return *OperLatency; + return DefMI->isTransient() ? 0 : DefaultDefLatency; } @@ -259,12 +198,11 @@ TargetSchedModel::computeInstrLatency(const MachineInstr *MI, (!hasInstrSchedModel() && !UseDefaultDefLatency)) return TII->getInstrLatency(&InstrItins, *MI); - if (hasInstrSchedModel()) { - const MCSchedClassDesc *SCDesc = resolveSchedClass(MI); - if (SCDesc->isValid()) - return computeInstrLatency(*SCDesc); - } - return TII->defaultDefLatency(SchedModel, *MI); + std::optional InstrLatency; + // This is used by subtargets that define an InstrSchedModel. + InstrLatency = TII->getInstrLatency(*this, *MI); + + return InstrLatency ? *InstrLatency : TII->defaultDefLatency(SchedModel, *MI); } unsigned TargetSchedModel::