From 450737fba25203c8dfeca036925e9af9d6c22f0a Mon Sep 17 00:00:00 2001
From: Austin Kerbow <Austin.Kerbow@amd.com>
Date: Tue, 25 Feb 2025 19:37:46 -0800
Subject: [PATCH] [TargetInstrInfo] Add target hook for InstrSchedModel
 latency. [NFCI]

These hooks already exist when using instruction itineraries for latency
info, this patch adds them for the newer TargetSchedModel.

Allows targets to dynamically set latency values in the DAG builder.
This is useful in multi-pass schedulers like in the AMDGUP backend where
we may want to schedule a region multiple times with a different machine
model or tweaked latencies for a specific instruction type.
---
 llvm/include/llvm/CodeGen/TargetInstrInfo.h | 12 +++
 llvm/include/llvm/CodeGen/TargetSchedule.h  |  3 +-
 llvm/lib/CodeGen/TargetInstrInfo.cpp        | 79 +++++++++++++++++++
 llvm/lib/CodeGen/TargetSchedule.cpp         | 84 +++------------------
 4 files changed, 103 insertions(+), 75 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 6a624a7052cdd..6589f85937724 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1848,6 +1848,13 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
                     const MachineInstr &DefMI, unsigned DefIdx,
                     const MachineInstr &UseMI, unsigned UseIdx) const;
 
+  /// Compute the latency of a register data dependence (DefIdx -> UseIdx)
+  /// using the TargetSchedModel.
+  virtual std::optional<unsigned>
+  getOperandLatency(const TargetSchedModel &SchedModel,
+                    const MachineInstr *DefMI, unsigned DefIdx,
+                    const MachineInstr *UseMI, unsigned UseIdx) const;
+
   /// Compute the instruction latency of a given instruction.
   /// If the instruction has higher cost when predicated, it's returned via
   /// PredCost.
@@ -1855,6 +1862,11 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
                                    const MachineInstr &MI,
                                    unsigned *PredCost = nullptr) const;
 
+  /// Compute the instruction latency using the TargetSchedModel.
+  virtual std::optional<unsigned>
+  getInstrLatency(const TargetSchedModel &TargetSchedModel,
+                  const MachineInstr &MI) const;
+
   virtual unsigned getPredicationCost(const MachineInstr &MI) const;
 
   virtual unsigned getInstrLatency(const InstrItineraryData *ItinData,
diff --git a/llvm/include/llvm/CodeGen/TargetSchedule.h b/llvm/include/llvm/CodeGen/TargetSchedule.h
index 74f8ed5a18d33..b033ed65aed1a 100644
--- a/llvm/include/llvm/CodeGen/TargetSchedule.h
+++ b/llvm/include/llvm/CodeGen/TargetSchedule.h
@@ -44,8 +44,6 @@ class TargetSchedModel {
   // Resource units per cycle. Latency normalization factor.
   unsigned ResourceLCM = 0;
 
-  unsigned computeInstrLatency(const MCSchedClassDesc &SCDesc) const;
-
   // EnableSchedModel and EnableSchedItins are used to control whether or not to
   // use the Target's {SchedMachineModel, InstrItins} for hardware infor based
   // Scheduling decisions. If both are enabled, as is the default, preference
@@ -203,6 +201,7 @@ class TargetSchedModel {
                                         bool UseDefaultDefLatency = true) const;
   LLVM_ABI unsigned computeInstrLatency(const MCInst &Inst) const;
   LLVM_ABI unsigned computeInstrLatency(unsigned Opcode) const;
+  LLVM_ABI unsigned computeInstrLatency(const MCSchedClassDesc &SCDesc) const;
 
   /// Output dependency latency of a pair of defs of the same register.
   ///
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index b0009560d3fcb..70c9d86e29471 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -1830,6 +1830,85 @@ unsigned TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
   return ItinData->getStageLatency(MI.getDesc().getSchedClass());
 }
 
+std::optional<unsigned>
+TargetInstrInfo::getInstrLatency(const TargetSchedModel &TargetSchedModel,
+                                 const MachineInstr &MI) const {
+  if (TargetSchedModel.hasInstrSchedModel()) {
+    const MCSchedClassDesc *SCDesc = TargetSchedModel.resolveSchedClass(&MI);
+    if (SCDesc->isValid())
+      return TargetSchedModel.computeInstrLatency(*SCDesc);
+  }
+
+  return std::nullopt;
+}
+
+std::optional<unsigned> TargetInstrInfo::getOperandLatency(const TargetSchedModel &SchedModel,
+                                            const MachineInstr *DefMI,
+                                            unsigned DefOperIdx,
+                                            const MachineInstr *UseMI,
+                                            unsigned UseOperIdx) const {
+  // Only handle the TargetSchedModel-based computation here. If no
+  // instruction scheduling model is available, defer to the caller.
+  if (!SchedModel.hasInstrSchedModel())
+    return std::nullopt;
+
+  const MCSchedClassDesc *SCDesc = SchedModel.resolveSchedClass(DefMI);
+  if (!SCDesc->isValid())
+    return std::nullopt;
+
+  // Compute DefIdx from operand index.
+  unsigned DefIdx = 0;
+  for (unsigned I = 0; I != DefOperIdx; ++I) {
+    const MachineOperand &MO = DefMI->getOperand(I);
+    if (MO.isReg() && MO.isDef())
+      ++DefIdx;
+  }
+  if (DefIdx < SCDesc->NumWriteLatencyEntries) {
+    // Lookup the definition's write latency in SubtargetInfo.
+    const TargetSubtargetInfo *STI = SchedModel.getSubtargetInfo();
+    const MCWriteLatencyEntry *WLEntry = STI->getWriteLatencyEntry(SCDesc, DefIdx);
+    unsigned WriteID = WLEntry->WriteResourceID;
+    unsigned Latency = WLEntry->Cycles >= 0 ? static_cast<unsigned>(WLEntry->Cycles) : 1000u;
+    if (!UseMI)
+      return Latency;
+
+    // Lookup the use's latency adjustment in SubtargetInfo.
+    const MCSchedClassDesc *UseDesc = SchedModel.resolveSchedClass(UseMI);
+    if (UseDesc->NumReadAdvanceEntries == 0)
+      return Latency;
+    // Compute UseIdx from operand index.
+    unsigned UseIdx = 0;
+    for (unsigned I = 0; I != UseOperIdx; ++I) {
+      const MachineOperand &MO = UseMI->getOperand(I);
+      if (MO.isReg() && MO.readsReg() && !MO.isDef())
+        ++UseIdx;
+    }
+    int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID);
+    if (Advance > 0 && static_cast<unsigned>(Advance) > Latency) // unsigned wrap
+      return 0;
+    return Latency - Advance;
+  }
+
+  // If DefIdx does not exist in the model (e.g. implicit defs), then return
+  // unit latency (defaultDefLatency may be too conservative).
+#ifndef NDEBUG
+  if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() &&
+      !DefMI->getDesc().operands()[DefOperIdx].isOptionalDef() &&
+      SchedModel.getMCSchedModel()->isComplete()) {
+    errs() << "DefIdx " << DefIdx << " exceeds machine model writes for "
+           << *DefMI
+           << " (Try with MCSchedModel.CompleteModel set to false)";
+    llvm_unreachable("incomplete machine model");
+  }
+#endif
+
+  // FIXME: Automatically giving all implicit defs defaultDefLatency is
+  // undesirable. We should only do it for defs that are known to the MC
+  // desc like flags. Truly implicit defs should get 1 cycle latency.
+  const MCSchedModel *MCSM = SchedModel.getMCSchedModel();
+  return DefMI->isTransient() ? 0 : defaultDefLatency(*MCSM, *DefMI);
+}
+
 bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
                                        const MachineInstr &DefMI,
                                        unsigned DefIdx) const {
diff --git a/llvm/lib/CodeGen/TargetSchedule.cpp b/llvm/lib/CodeGen/TargetSchedule.cpp
index 7ae9e0e37bbab..210f3e247200a 100644
--- a/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -134,37 +134,6 @@ resolveSchedClass(const MachineInstr *MI) const {
   return SCDesc;
 }
 
-/// Find the def index of this operand. This index maps to the machine model and
-/// is independent of use operands. Def operands may be reordered with uses or
-/// merged with uses without affecting the def index (e.g. before/after
-/// regalloc). However, an instruction's def operands must never be reordered
-/// with respect to each other.
-static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) {
-  unsigned DefIdx = 0;
-  for (unsigned i = 0; i != DefOperIdx; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.isDef())
-      ++DefIdx;
-  }
-  return DefIdx;
-}
-
-/// Find the use index of this operand. This is independent of the instruction's
-/// def operands.
-///
-/// Note that uses are not determined by the operand's isUse property, which
-/// is simply the inverse of isDef. Here we consider any readsReg operand to be
-/// a "use". The machine model allows an operand to be both a Def and Use.
-static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) {
-  unsigned UseIdx = 0;
-  for (unsigned i = 0; i != UseOperIdx; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.readsReg() && !MO.isDef())
-      ++UseIdx;
-  }
-  return UseIdx;
-}
-
 // Top-level API for clients that know the operand indices. This doesn't need to
 // return std::optional<unsigned>, as it always returns a valid latency.
 unsigned TargetSchedModel::computeOperandLatency(
@@ -177,8 +146,8 @@ unsigned TargetSchedModel::computeOperandLatency(
   if (!hasInstrSchedModel() && !hasInstrItineraries())
     return DefaultDefLatency;
 
+  std::optional<unsigned> OperLatency;
   if (hasInstrItineraries()) {
-    std::optional<unsigned> OperLatency;
     if (UseMI) {
       OperLatency = TII->getOperandLatency(&InstrItins, *DefMI, DefOperIdx,
                                            *UseMI, UseOperIdx);
@@ -195,41 +164,11 @@ unsigned TargetSchedModel::computeOperandLatency(
   }
 
   // hasInstrSchedModel()
-  const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
-  unsigned DefIdx = findDefIdx(DefMI, DefOperIdx);
-  if (DefIdx < SCDesc->NumWriteLatencyEntries) {
-    // Lookup the definition's write latency in SubtargetInfo.
-    const MCWriteLatencyEntry *WLEntry =
-      STI->getWriteLatencyEntry(SCDesc, DefIdx);
-    unsigned WriteID = WLEntry->WriteResourceID;
-    unsigned Latency = capLatency(WLEntry->Cycles);
-    if (!UseMI)
-      return Latency;
-
-    // Lookup the use's latency adjustment in SubtargetInfo.
-    const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI);
-    if (UseDesc->NumReadAdvanceEntries == 0)
-      return Latency;
-    unsigned UseIdx = findUseIdx(UseMI, UseOperIdx);
-    int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID);
-    if (Advance > 0 && (unsigned)Advance > Latency) // unsigned wrap
-      return 0;
-    return Latency - Advance;
-  }
-  // If DefIdx does not exist in the model (e.g. implicit defs), then return
-  // unit latency (defaultDefLatency may be too conservative).
-#ifndef NDEBUG
-  if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() &&
-      !DefMI->getDesc().operands()[DefOperIdx].isOptionalDef() &&
-      SchedModel.isComplete()) {
-    errs() << "DefIdx " << DefIdx << " exceeds machine model writes for "
-           << *DefMI << " (Try with MCSchedModel.CompleteModel set to false)";
-    llvm_unreachable("incomplete machine model");
-  }
-#endif
-  // FIXME: Automatically giving all implicit defs defaultDefLatency is
-  // undesirable. We should only do it for defs that are known to the MC
-  // desc like flags. Truly implicit defs should get 1 cycle latency.
+  OperLatency =
+      TII->getOperandLatency(*this, DefMI, DefOperIdx, UseMI, UseOperIdx);
+  if (OperLatency)
+    return *OperLatency;
+
   return DefMI->isTransient() ? 0 : DefaultDefLatency;
 }
 
@@ -259,12 +198,11 @@ TargetSchedModel::computeInstrLatency(const MachineInstr *MI,
       (!hasInstrSchedModel() && !UseDefaultDefLatency))
     return TII->getInstrLatency(&InstrItins, *MI);
 
-  if (hasInstrSchedModel()) {
-    const MCSchedClassDesc *SCDesc = resolveSchedClass(MI);
-    if (SCDesc->isValid())
-      return computeInstrLatency(*SCDesc);
-  }
-  return TII->defaultDefLatency(SchedModel, *MI);
+  std::optional<unsigned> InstrLatency;
+  // This is used by subtargets that define an InstrSchedModel.
+  InstrLatency = TII->getInstrLatency(*this, *MI);
+
+  return InstrLatency ? *InstrLatency : TII->defaultDefLatency(SchedModel, *MI);
 }
 
 unsigned TargetSchedModel::