243 changes: 143 additions & 100 deletions llvm/lib/Transforms/Utils/CodeLayout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,22 +99,15 @@ static cl::opt<unsigned> BackwardDistance(
cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP"));

// The maximum size of a chain created by the algorithm. The size is bounded
// so that the algorithm can efficiently process extremely large instance.
// so that the algorithm can efficiently process extremely large instances.
static cl::opt<unsigned>
MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(4096),
cl::desc("The maximum size of a chain to create."));
MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(512),
cl::desc("The maximum size of a chain to create"));

// The maximum size of a chain for splitting. Larger values of the threshold
// may yield better quality at the cost of worsen run-time.
static cl::opt<unsigned> ChainSplitThreshold(
"ext-tsp-chain-split-threshold", cl::ReallyHidden, cl::init(128),
cl::desc("The maximum size of a chain to apply splitting"));

// The option enables splitting (large) chains along in-coming and out-going
// jumps. This typically results in a better quality.
static cl::opt<bool> EnableChainSplitAlongJumps(
"ext-tsp-enable-chain-split-along-jumps", cl::ReallyHidden, cl::init(true),
cl::desc("The maximum size of a chain to apply splitting"));
// The maximum ratio between densities of two chains for merging.
static cl::opt<double> MaxMergeDensityRatio(
"ext-tsp-max-merge-density-ratio", cl::ReallyHidden, cl::init(128),
cl::desc("The maximum ratio between densities of two chains for merging"));

// Algorithm-specific options for CDS.
static cl::opt<unsigned> CacheEntries("cds-cache-entries", cl::ReallyHidden,
Expand Down Expand Up @@ -217,11 +210,13 @@ struct NodeT {
NodeT &operator=(const NodeT &) = delete;
NodeT &operator=(NodeT &&) = default;

explicit NodeT(size_t Index, uint64_t Size, uint64_t EC)
: Index(Index), Size(Size), ExecutionCount(EC) {}
explicit NodeT(size_t Index, uint64_t Size, uint64_t Count)
: Index(Index), Size(Size), ExecutionCount(Count) {}

bool isEntry() const { return Index == 0; }

// Check if Other is a successor of the node.
bool isSuccessor(const NodeT *Other) const;
// The total execution count of outgoing jumps.
uint64_t outCount() const;

Expand Down Expand Up @@ -442,6 +437,14 @@ struct ChainEdge {
bool CacheValidBackward{false};
};

bool NodeT::isSuccessor(const NodeT *Other) const {
for (JumpT *Jump : OutJumps) {
if (Jump->Target == Other)
return true;
}
return false;
}

uint64_t NodeT::outCount() const {
uint64_t Count = 0;
for (JumpT *Jump : OutJumps)
Expand Down Expand Up @@ -475,14 +478,14 @@ void ChainT::mergeEdges(ChainT *Other) {
}
}

using NodeIter = std::vector<NodeT *>::const_iterator;
/// A wrapper around three concatenated vectors (chains) of objects; it is used
/// to avoid extra instantiation of the vectors.
template <typename ObjType> struct MergedVector {
using ObjIter = typename std::vector<ObjType *>::const_iterator;

/// A wrapper around three chains of nodes; it is used to avoid extra
/// instantiation of the vectors.
struct MergedChain {
MergedChain(NodeIter Begin1, NodeIter End1, NodeIter Begin2 = NodeIter(),
NodeIter End2 = NodeIter(), NodeIter Begin3 = NodeIter(),
NodeIter End3 = NodeIter())
MergedVector(ObjIter Begin1, ObjIter End1, ObjIter Begin2 = ObjIter(),
ObjIter End2 = ObjIter(), ObjIter Begin3 = ObjIter(),
ObjIter End3 = ObjIter())
: Begin1(Begin1), End1(End1), Begin2(Begin2), End2(End2), Begin3(Begin3),
End3(End3) {}

Expand All @@ -507,43 +510,56 @@ struct MergedChain {

const NodeT *getFirstNode() const { return *Begin1; }

bool empty() const { return Begin1 == End1; }

void append(ObjIter Begin, ObjIter End) {
if (Begin2 == End2) {
Begin2 = Begin;
End2 = End;
return;
}
assert(Begin3 == End3 && "cannot extend MergedVector");
Begin3 = Begin;
End3 = End;
}

private:
NodeIter Begin1;
NodeIter End1;
NodeIter Begin2;
NodeIter End2;
NodeIter Begin3;
NodeIter End3;
ObjIter Begin1;
ObjIter End1;
ObjIter Begin2;
ObjIter End2;
ObjIter Begin3;
ObjIter End3;
};

/// Merge two chains of nodes respecting a given 'type' and 'offset'.
///
/// If MergeType == 0, then the result is a concatenation of two chains.
/// Otherwise, the first chain is cut into two sub-chains at the offset,
/// and merged using all possible ways of concatenating three chains.
MergedChain mergeNodes(const std::vector<NodeT *> &X,
const std::vector<NodeT *> &Y, size_t MergeOffset,
MergeTypeT MergeType) {
MergedVector<NodeT> mergeNodes(const std::vector<NodeT *> &X,
const std::vector<NodeT *> &Y,
size_t MergeOffset, MergeTypeT MergeType) {
// Split the first chain, X, into X1 and X2.
NodeIter BeginX1 = X.begin();
NodeIter EndX1 = X.begin() + MergeOffset;
NodeIter BeginX2 = X.begin() + MergeOffset;
NodeIter EndX2 = X.end();
NodeIter BeginY = Y.begin();
NodeIter EndY = Y.end();
MergedVector<NodeT>::ObjIter BeginX1 = X.begin();
MergedVector<NodeT>::ObjIter EndX1 = X.begin() + MergeOffset;
MergedVector<NodeT>::ObjIter BeginX2 = X.begin() + MergeOffset;
MergedVector<NodeT>::ObjIter EndX2 = X.end();
MergedVector<NodeT>::ObjIter BeginY = Y.begin();
MergedVector<NodeT>::ObjIter EndY = Y.end();

// Construct a new chain from the three existing ones.
switch (MergeType) {
case MergeTypeT::X_Y:
return MergedChain(BeginX1, EndX2, BeginY, EndY);
return MergedVector<NodeT>(BeginX1, EndX2, BeginY, EndY);
case MergeTypeT::Y_X:
return MergedChain(BeginY, EndY, BeginX1, EndX2);
return MergedVector<NodeT>(BeginY, EndY, BeginX1, EndX2);
case MergeTypeT::X1_Y_X2:
return MergedChain(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2);
return MergedVector<NodeT>(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2);
case MergeTypeT::Y_X2_X1:
return MergedChain(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1);
return MergedVector<NodeT>(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1);
case MergeTypeT::X2_X1_Y:
return MergedChain(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY);
return MergedVector<NodeT>(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY);
}
llvm_unreachable("unexpected chain merge type");
}
Expand Down Expand Up @@ -618,6 +634,10 @@ class ExtTSPImpl {
AllChains.reserve(NumNodes);
HotChains.reserve(NumNodes);
for (NodeT &Node : AllNodes) {
// Adjust execution counts.
Node.ExecutionCount = std::max(Node.ExecutionCount, Node.inCount());
Node.ExecutionCount = std::max(Node.ExecutionCount, Node.outCount());
// Create a chain.
AllChains.emplace_back(Node.Index, &Node);
Node.CurChain = &AllChains.back();
if (Node.ExecutionCount > 0)
Expand All @@ -630,13 +650,13 @@ class ExtTSPImpl {
for (JumpT *Jump : PredNode.OutJumps) {
NodeT *SuccNode = Jump->Target;
ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain);
// this edge is already present in the graph.
// This edge is already present in the graph.
if (CurEdge != nullptr) {
assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr);
CurEdge->appendJump(Jump);
continue;
}
// this is a new edge.
// This is a new edge.
AllEdges.emplace_back(Jump);
PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back());
SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back());
Expand All @@ -649,7 +669,7 @@ class ExtTSPImpl {
/// to B are from A. Such nodes should be adjacent in the optimal ordering;
/// the method finds and merges such pairs of nodes.
void mergeForcedPairs() {
// Find fallthroughs based on edge weights.
// Find forced pairs of blocks.
for (NodeT &Node : AllNodes) {
if (SuccNodes[Node.Index].size() == 1 &&
PredNodes[SuccNodes[Node.Index][0]].size() == 1 &&
Expand Down Expand Up @@ -699,28 +719,44 @@ class ExtTSPImpl {
/// Deterministically compare pairs of chains.
auto compareChainPairs = [](const ChainT *A1, const ChainT *B1,
const ChainT *A2, const ChainT *B2) {
if (A1 != A2)
return A1->Id < A2->Id;
return B1->Id < B2->Id;
return std::make_tuple(A1->Id, B1->Id) < std::make_tuple(A2->Id, B2->Id);
};

double PrevScore = 1e9;
while (HotChains.size() > 1) {
ChainT *BestChainPred = nullptr;
ChainT *BestChainSucc = nullptr;
MergeGainT BestGain;
// Iterate over all pairs of chains.
for (ChainT *ChainPred : HotChains) {
// Since the score of merging doesn't increase, we can stop early when
// the newly found merge is as good as the previous one.
if (BestGain.score() == PrevScore)
break;
// Get candidates for merging with the current chain.
for (const auto &[ChainSucc, Edge] : ChainPred->Edges) {
// Ignore loop edges.
if (ChainPred == ChainSucc)
if (Edge->isSelfEdge())
continue;

// Stop early if the combined chain violates the maximum allowed size.
if (ChainPred->numBlocks() + ChainSucc->numBlocks() >= MaxChainSize)
continue;
// Don't merge the chains if they have vastly different densities.
// We stop early if the ratio between the densities exceeds
// MaxMergeDensityRatio. Smaller values of the option result in
// fewer merges (hence, more chains), which in turn typically yields
// smaller size of the hot code section.
double minDensity =
std::min(ChainPred->density(), ChainSucc->density());
double maxDensity =
std::max(ChainPred->density(), ChainSucc->density());
assert(minDensity > 0.0 && maxDensity > 0.0 &&
"incorrectly computed chain densities");
const double Ratio = maxDensity / minDensity;
if (Ratio > MaxMergeDensityRatio)
continue;

// Compute the gain of merging the two chains.
// Compute the gain of merging the two chains
MergeGainT CurGain = getBestMergeGain(ChainPred, ChainSucc, Edge);
if (CurGain.score() <= EPS)
continue;
Expand All @@ -732,6 +768,9 @@ class ExtTSPImpl {
BestGain = CurGain;
BestChainPred = ChainPred;
BestChainSucc = ChainSucc;
// Stop early when the merge is as good as the previous one.
if (BestGain.score() == PrevScore)
break;
}
}
}
Expand All @@ -741,6 +780,7 @@ class ExtTSPImpl {
break;

// Merge the best pair of chains.
PrevScore = BestGain.score();
mergeChains(BestChainPred, BestChainSucc, BestGain.mergeOffset(),
BestGain.mergeType());
}
Expand Down Expand Up @@ -769,24 +809,25 @@ class ExtTSPImpl {
}

/// Compute the Ext-TSP score for a given node order and a list of jumps.
double extTSPScore(const MergedChain &MergedBlocks,
const std::vector<JumpT *> &Jumps) const {
if (Jumps.empty())
double extTSPScore(const MergedVector<NodeT> &Nodes,
const MergedVector<JumpT> &Jumps) const {
if (Jumps.empty() || Nodes.empty())
return 0.0;

uint64_t CurAddr = 0;
MergedBlocks.forEach([&](const NodeT *Node) {
Nodes.forEach([&](const NodeT *Node) {
Node->EstimatedAddr = CurAddr;
CurAddr += Node->Size;
});

double Score = 0;
for (JumpT *Jump : Jumps) {
Jumps.forEach([&](const JumpT *Jump) {
const NodeT *SrcBlock = Jump->Source;
const NodeT *DstBlock = Jump->Target;
Score += ::extTSPScore(SrcBlock->EstimatedAddr, SrcBlock->Size,
DstBlock->EstimatedAddr, Jump->ExecutionCount,
Jump->IsConditional);
}
});
return Score;
}

Expand All @@ -798,17 +839,15 @@ class ExtTSPImpl {
/// element being the corresponding merging type.
MergeGainT getBestMergeGain(ChainT *ChainPred, ChainT *ChainSucc,
ChainEdge *Edge) const {
if (Edge->hasCachedMergeGain(ChainPred, ChainSucc)) {
if (Edge->hasCachedMergeGain(ChainPred, ChainSucc))
return Edge->getCachedMergeGain(ChainPred, ChainSucc);
}

// Precompute jumps between ChainPred and ChainSucc.
auto Jumps = Edge->jumps();
ChainEdge *EdgePP = ChainPred->getEdge(ChainPred);
if (EdgePP != nullptr) {
Jumps.insert(Jumps.end(), EdgePP->jumps().begin(), EdgePP->jumps().end());
}
MergedVector<JumpT> Jumps(Edge->jumps().begin(), Edge->jumps().end());
assert(!Jumps.empty() && "trying to merge chains w/o jumps");
ChainEdge *EdgePP = ChainPred->getEdge(ChainPred);
if (EdgePP != nullptr)
Jumps.append(EdgePP->jumps().begin(), EdgePP->jumps().end());

// This object holds the best chosen gain of merging two chains.
MergeGainT Gain = MergeGainT();
Expand Down Expand Up @@ -836,35 +875,36 @@ class ExtTSPImpl {
Gain.updateIfLessThan(
computeMergeGain(ChainPred, ChainSucc, Jumps, 0, MergeTypeT::X_Y));

if (EnableChainSplitAlongJumps) {
// Attach (a part of) ChainPred before the first node of ChainSucc.
for (JumpT *Jump : ChainSucc->Nodes.front()->InJumps) {
const NodeT *SrcBlock = Jump->Source;
if (SrcBlock->CurChain != ChainPred)
continue;
size_t Offset = SrcBlock->CurIndex + 1;
tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::X2_X1_Y});
}
// Attach (a part of) ChainPred before the first node of ChainSucc.
for (JumpT *Jump : ChainSucc->Nodes.front()->InJumps) {
const NodeT *SrcBlock = Jump->Source;
if (SrcBlock->CurChain != ChainPred)
continue;
size_t Offset = SrcBlock->CurIndex + 1;
tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::X2_X1_Y});
}

// Attach (a part of) ChainPred after the last node of ChainSucc.
for (JumpT *Jump : ChainSucc->Nodes.back()->OutJumps) {
const NodeT *DstBlock = Jump->Target;
if (DstBlock->CurChain != ChainPred)
continue;
size_t Offset = DstBlock->CurIndex;
tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1});
}
// Attach (a part of) ChainPred after the last node of ChainSucc.
for (JumpT *Jump : ChainSucc->Nodes.back()->OutJumps) {
const NodeT *DstBlock = Jump->Target;
if (DstBlock->CurChain != ChainPred)
continue;
size_t Offset = DstBlock->CurIndex;
tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1});
}

// Try to break ChainPred in various ways and concatenate with ChainSucc.
if (ChainPred->Nodes.size() <= ChainSplitThreshold) {
for (size_t Offset = 1; Offset < ChainPred->Nodes.size(); Offset++) {
// Try to split the chain in different ways. In practice, applying
// X2_Y_X1 merging is almost never provides benefits; thus, we exclude
// it from consideration to reduce the search space.
tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1,
MergeTypeT::X2_X1_Y});
}
// In practice, applying X2_Y_X1 merging is almost never provides benefits;
// thus, we exclude it from consideration to reduce the search space.
for (size_t Offset = 1; Offset < ChainPred->Nodes.size(); Offset++) {
// Do not split the chain along a jump.
const NodeT *BB = ChainPred->Nodes[Offset - 1];
const NodeT *BB2 = ChainPred->Nodes[Offset];
if (BB->isSuccessor(BB2))
continue;

tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1,
MergeTypeT::X2_X1_Y});
}
Edge->setCachedMergeGain(ChainPred, ChainSucc, Gain);
return Gain;
Expand All @@ -875,19 +915,20 @@ class ExtTSPImpl {
///
/// The two chains are not modified in the method.
MergeGainT computeMergeGain(const ChainT *ChainPred, const ChainT *ChainSucc,
const std::vector<JumpT *> &Jumps,
const MergedVector<JumpT> &Jumps,
size_t MergeOffset, MergeTypeT MergeType) const {
auto MergedBlocks =
MergedVector<NodeT> MergedNodes =
mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType);

// Do not allow a merge that does not preserve the original entry point.
if ((ChainPred->isEntry() || ChainSucc->isEntry()) &&
!MergedBlocks.getFirstNode()->isEntry())
!MergedNodes.getFirstNode()->isEntry())
return MergeGainT();

// The gain for the new chain.
auto NewGainScore = extTSPScore(MergedBlocks, Jumps) - ChainPred->Score;
return MergeGainT(NewGainScore, MergeOffset, MergeType);
double NewScore = extTSPScore(MergedNodes, Jumps);
double CurScore = ChainPred->Score;
return MergeGainT(NewScore - CurScore, MergeOffset, MergeType);
}

/// Merge chain From into chain Into, update the list of active chains,
Expand All @@ -897,7 +938,7 @@ class ExtTSPImpl {
assert(Into != From && "a chain cannot be merged with itself");

// Merge the nodes.
MergedChain MergedNodes =
MergedVector<NodeT> MergedNodes =
mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType);
Into->merge(From, MergedNodes.getNodes());

Expand All @@ -908,8 +949,10 @@ class ExtTSPImpl {
// Update cached ext-tsp score for the new chain.
ChainEdge *SelfEdge = Into->getEdge(Into);
if (SelfEdge != nullptr) {
MergedNodes = MergedChain(Into->Nodes.begin(), Into->Nodes.end());
Into->Score = extTSPScore(MergedNodes, SelfEdge->jumps());
MergedNodes = MergedVector<NodeT>(Into->Nodes.begin(), Into->Nodes.end());
MergedVector<JumpT> MergedJumps(SelfEdge->jumps().begin(),
SelfEdge->jumps().end());
Into->Score = extTSPScore(MergedNodes, MergedJumps);
}

// Remove the chain from the list of active chains.
Expand Down Expand Up @@ -1255,7 +1298,7 @@ class CDSortImpl {
}

/// Compute the change of the distance locality after merging the chains.
double distBasedLocalityGain(const MergedChain &MergedBlocks,
double distBasedLocalityGain(const MergedVector<NodeT> &MergedBlocks,
const std::vector<JumpT *> &Jumps) const {
if (Jumps.empty())
return 0.0;
Expand Down Expand Up @@ -1283,7 +1326,7 @@ class CDSortImpl {
assert(Into != From && "a chain cannot be merged with itself");

// Merge the nodes.
MergedChain MergedNodes =
MergedVector<NodeT> MergedNodes =
mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType);
Into->merge(From, MergedNodes.getNodes());

Expand Down