Skip to content

Commit

Permalink
Adjust MachineScheduler to use ProcResource counts
Browse files Browse the repository at this point in the history
This fix allows the scheduler to take into account the number of instances of
each ProcResource specified. Previously a declaration in a scheduler of
ProcResource<1> would be treated identically to a declaration of
ProcResource<2>. Now the hazard recognizer would report a hazard only after all
of the resource instances are busy.

Patch by Jackson Woodruff and Momchil Velikov.

Differential Revision: https://reviews.llvm.org/D51160

llvm-svn: 360441
  • Loading branch information
momchil-velikov authored and MrSidims committed May 17, 2019
1 parent 2020fb7 commit a5d700c
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 19 deletions.
10 changes: 9 additions & 1 deletion llvm/include/llvm/CodeGen/MachineScheduler.h
Expand Up @@ -666,6 +666,10 @@ class SchedBoundary {
// scheduled instruction.
SmallVector<unsigned, 16> ReservedCycles;

// For each PIdx, stores first index into ReservedCycles that corresponds to
// it.
SmallVector<unsigned, 16> ReservedCyclesIndex;

#ifndef NDEBUG
// Remember the greatest possible stall as an upper bound on the number of
// times we should retry the pending queue because of a hazard.
Expand Down Expand Up @@ -740,7 +744,11 @@ class SchedBoundary {
/// cycle.
unsigned getLatencyStallCycles(SUnit *SU);

unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles);
unsigned getNextResourceCycleByInstance(unsigned InstanceIndex,
unsigned Cycles);

std::pair<unsigned, unsigned> getNextResourceCycle(unsigned PIdx,
unsigned Cycles);

bool checkHazard(SUnit *SU);

Expand Down
72 changes: 55 additions & 17 deletions llvm/lib/CodeGen/MachineScheduler.cpp
Expand Up @@ -1863,6 +1863,7 @@ void SchedBoundary::reset() {
ZoneCritResIdx = 0;
IsResourceLimited = false;
ReservedCycles.clear();
ReservedCyclesIndex.clear();
#ifndef NDEBUG
// Track the maximum number of stall cycles that could arise either from the
// latency of a DAG edge or the number of cycles that a processor resource is
Expand Down Expand Up @@ -1901,8 +1902,17 @@ init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
SchedModel = smodel;
Rem = rem;
if (SchedModel->hasInstrSchedModel()) {
ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
ReservedCyclesIndex.resize(ResourceCount);
ExecutedResCounts.resize(ResourceCount);
unsigned NumUnits = 0;

for (unsigned i = 0; i < ResourceCount; ++i) {
ReservedCyclesIndex[i] = NumUnits;
NumUnits += SchedModel->getProcResource(i)->NumUnits;
}

ReservedCycles.resize(NumUnits, InvalidCycle);
}
}

Expand All @@ -1923,11 +1933,11 @@ unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
return 0;
}

/// Compute the next cycle at which the given processor resource can be
/// scheduled.
unsigned SchedBoundary::
getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
unsigned NextUnreserved = ReservedCycles[PIdx];
/// Compute the next cycle at which the given processor resource unit
/// can be scheduled.
unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
unsigned Cycles) {
unsigned NextUnreserved = ReservedCycles[InstanceIdx];
// If this resource has never been used, always return cycle zero.
if (NextUnreserved == InvalidCycle)
return 0;
Expand All @@ -1937,6 +1947,29 @@ getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
return NextUnreserved;
}

/// Compute the next cycle at which the given processor resource can be
/// scheduled. Returns the next cycle and the index of the processor resource
/// instance in the reserved cycles vector.
std::pair<unsigned, unsigned>
SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
unsigned MinNextUnreserved = InvalidCycle;
unsigned InstanceIdx = 0;
unsigned StartIndex = ReservedCyclesIndex[PIdx];
unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
assert(NumberOfInstances > 0 &&
"Cannot have zero instances of a ProcResource");

for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
++I) {
unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
if (MinNextUnreserved > NextUnreserved) {
InstanceIdx = I;
MinNextUnreserved = NextUnreserved;
}
}
return std::make_pair(MinNextUnreserved, InstanceIdx);
}

/// Does this SU have a hazard within the current instruction group.
///
/// The scheduler supports two modes of hazard recognition. The first is the
Expand Down Expand Up @@ -1978,14 +2011,16 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
SchedModel->getWriteProcResEnd(SC))) {
unsigned ResIdx = PE.ProcResourceIdx;
unsigned Cycles = PE.Cycles;
unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles);
unsigned NRCycle, InstanceIdx;
std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles);
if (NRCycle > CurrCycle) {
#ifndef NDEBUG
MaxObservedStall = std::max(Cycles, MaxObservedStall);
#endif
LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
<< SchedModel->getResourceName(ResIdx) << "="
<< NRCycle << "c\n");
<< SchedModel->getResourceName(ResIdx)
<< '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
<< "=" << NRCycle << "c\n");
return true;
}
}
Expand Down Expand Up @@ -2140,10 +2175,12 @@ countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
<< "c\n");
}
// For reserved resources, record the highest cycle using the resource.
unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
unsigned NextAvailable, InstanceIdx;
std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles);
if (NextAvailable > CurrCycle) {
LLVM_DEBUG(dbgs() << " Resource conflict: "
<< SchedModel->getProcResource(PIdx)->Name
<< SchedModel->getResourceName(PIdx)
<< '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
<< " reserved until @" << NextAvailable << "\n");
}
return NextAvailable;
Expand Down Expand Up @@ -2233,12 +2270,13 @@ void SchedBoundary::bumpNode(SUnit *SU) {
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
unsigned PIdx = PI->ProcResourceIdx;
if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
unsigned ReservedUntil, InstanceIdx;
std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0);
if (isTop()) {
ReservedCycles[PIdx] =
std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
}
else
ReservedCycles[PIdx] = NextCycle;
ReservedCycles[InstanceIdx] =
std::max(ReservedUntil, NextCycle + PI->Cycles);
} else
ReservedCycles[InstanceIdx] = NextCycle;
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion llvm/test/CodeGen/AArch64/misched-fusion-aes.ll
Expand Up @@ -205,7 +205,9 @@ entry:

; CHECK-LABEL: aes_load_store:
; CHECK: aese [[VA:v[0-7].16b]], {{v[0-7].16b}}
; CHECK-NEXT: aesmc [[VA]], [[VA]]
; aese and aesmc are described to share a unit, hence won't be scheduled on the
; same cycle and the scheduler can find another instruction to place inbetween
; CHECK: aesmc [[VA]], [[VA]]
; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}}
; CHECK-NEXT: aesmc [[VB]], [[VB]]
; CHECK-NOT: aesmc
Expand Down
21 changes: 21 additions & 0 deletions llvm/test/CodeGen/ARM/proc-resource-sched.ll
@@ -0,0 +1,21 @@
; RUN: llc -mtriple=arm-eabi -mcpu=cortex-r52 -debug-only=machine-scheduler %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-R52
; REQUIRES: asserts

; source_filename = "sched-2.c"
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"

define dso_local i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr {
entry:
%add = add nsw i32 %b, %a
%add1 = add nsw i32 %d, %c
%div = sdiv i32 %add, %add1
ret i32 %div
}

; Cortex-R52 model describes it as dual-issue with two integer ALUs
; It should be able to issue the two additions in the same cycle.
; CHECK-R52: MI Scheduling
; CHECK-R52: Cycle: 14
; CHECK-R52: Scheduling SU(5) %5:gpr = nsw ADDrr %3:gpr, %2:gpr, 14, $noreg, $noreg
; CHECK-R52: Scheduling SU(4) %4:gpr = nsw ADDrr %1:gpr, %0:gpr, 14, $noreg, $noreg
; CHECK-R52: Cycle: 15

0 comments on commit a5d700c

Please sign in to comment.