Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CodeLayout][NFC] Using MergedVector to avoid extra vector allocations #68724

Merged
merged 4 commits into from
Oct 12, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 77 additions & 62 deletions llvm/lib/Transforms/Utils/CodeLayout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ static cl::opt<unsigned> BackwardDistance(
cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP"));

// The maximum size of a chain created by the algorithm. The size is bounded
// so that the algorithm can efficiently process extremely large instance.
// so that the algorithm can efficiently process extremely large instances.
static cl::opt<unsigned>
MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(4096),
cl::desc("The maximum size of a chain to create."));
Expand Down Expand Up @@ -217,8 +217,8 @@ struct NodeT {
NodeT &operator=(const NodeT &) = delete;
NodeT &operator=(NodeT &&) = default;

explicit NodeT(size_t Index, uint64_t Size, uint64_t EC)
: Index(Index), Size(Size), ExecutionCount(EC) {}
explicit NodeT(size_t Index, uint64_t Size, uint64_t Count)
: Index(Index), Size(Size), ExecutionCount(Count) {}

bool isEntry() const { return Index == 0; }

Expand Down Expand Up @@ -477,12 +477,12 @@ void ChainT::mergeEdges(ChainT *Other) {

using NodeIter = std::vector<NodeT *>::const_iterator;

/// A wrapper around three chains of nodes; it is used to avoid extra
/// instantiation of the vectors.
struct MergedChain {
MergedChain(NodeIter Begin1, NodeIter End1, NodeIter Begin2 = NodeIter(),
NodeIter End2 = NodeIter(), NodeIter Begin3 = NodeIter(),
NodeIter End3 = NodeIter())
/// A wrapper around three concatenated vectors (chains) of nodes; it is used
/// to avoid extra instantiation of the vectors.
struct MergedNodesT {
MergedNodesT(NodeIter Begin1, NodeIter End1, NodeIter Begin2 = NodeIter(),
NodeIter End2 = NodeIter(), NodeIter Begin3 = NodeIter(),
NodeIter End3 = NodeIter())
: Begin1(Begin1), End1(End1), Begin2(Begin2), End2(End2), Begin3(Begin3),
End3(End3) {}

Expand All @@ -507,6 +507,8 @@ struct MergedChain {

const NodeT *getFirstNode() const { return *Begin1; }

bool empty() const { return Begin1 == End1; }

private:
NodeIter Begin1;
NodeIter End1;
Expand All @@ -516,14 +518,34 @@ struct MergedChain {
NodeIter End3;
};

/// A wrapper around two concatenated vectors (chains) of jumps.
struct MergedJumpsT {
MergedJumpsT(const std::vector<JumpT *> *Jumps1,
const std::vector<JumpT *> *Jumps2 = nullptr) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This default value is unused.

assert(!Jumps1->empty() && "cannot merge empty jump list");
JumpArray[0] = Jumps1;
JumpArray[1] = Jumps2;
}

template <typename F> void forEach(const F &Func) const {
for (auto Jumps : JumpArray)
if (Jumps != nullptr)
for (JumpT *Jump : *Jumps)
Func(Jump);
}

private:
std::array<const std::vector<JumpT *> *, 2> JumpArray{nullptr, nullptr};
};

/// Merge two chains of nodes respecting a given 'type' and 'offset'.
///
/// If MergeType == 0, then the result is a concatenation of two chains.
/// Otherwise, the first chain is cut into two sub-chains at the offset,
/// and merged using all possible ways of concatenating three chains.
MergedChain mergeNodes(const std::vector<NodeT *> &X,
const std::vector<NodeT *> &Y, size_t MergeOffset,
MergeTypeT MergeType) {
MergedNodesT mergeNodes(const std::vector<NodeT *> &X,
const std::vector<NodeT *> &Y, size_t MergeOffset,
MergeTypeT MergeType) {
// Split the first chain, X, into X1 and X2.
NodeIter BeginX1 = X.begin();
NodeIter EndX1 = X.begin() + MergeOffset;
Expand All @@ -535,15 +557,15 @@ MergedChain mergeNodes(const std::vector<NodeT *> &X,
// Construct a new chain from the three existing ones.
switch (MergeType) {
case MergeTypeT::X_Y:
return MergedChain(BeginX1, EndX2, BeginY, EndY);
return MergedNodesT(BeginX1, EndX2, BeginY, EndY);
case MergeTypeT::Y_X:
return MergedChain(BeginY, EndY, BeginX1, EndX2);
return MergedNodesT(BeginY, EndY, BeginX1, EndX2);
case MergeTypeT::X1_Y_X2:
return MergedChain(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2);
return MergedNodesT(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2);
case MergeTypeT::Y_X2_X1:
return MergedChain(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1);
return MergedNodesT(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1);
case MergeTypeT::X2_X1_Y:
return MergedChain(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY);
return MergedNodesT(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY);
}
llvm_unreachable("unexpected chain merge type");
}
Expand Down Expand Up @@ -618,6 +640,7 @@ class ExtTSPImpl {
AllChains.reserve(NumNodes);
HotChains.reserve(NumNodes);
for (NodeT &Node : AllNodes) {
// Create a chain.
AllChains.emplace_back(Node.Index, &Node);
Node.CurChain = &AllChains.back();
if (Node.ExecutionCount > 0)
Expand All @@ -630,13 +653,13 @@ class ExtTSPImpl {
for (JumpT *Jump : PredNode.OutJumps) {
NodeT *SuccNode = Jump->Target;
ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain);
// this edge is already present in the graph.
// This edge is already present in the graph.
if (CurEdge != nullptr) {
assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr);
CurEdge->appendJump(Jump);
continue;
}
// this is a new edge.
// This is a new edge.
AllEdges.emplace_back(Jump);
PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back());
SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back());
Expand All @@ -649,7 +672,7 @@ class ExtTSPImpl {
/// to B are from A. Such nodes should be adjacent in the optimal ordering;
/// the method finds and merges such pairs of nodes.
void mergeForcedPairs() {
// Find fallthroughs based on edge weights.
// Find forced pairs of blocks.
for (NodeT &Node : AllNodes) {
if (SuccNodes[Node.Index].size() == 1 &&
PredNodes[SuccNodes[Node.Index][0]].size() == 1 &&
Expand Down Expand Up @@ -699,9 +722,7 @@ class ExtTSPImpl {
/// Deterministically compare pairs of chains.
auto compareChainPairs = [](const ChainT *A1, const ChainT *B1,
const ChainT *A2, const ChainT *B2) {
if (A1 != A2)
return A1->Id < A2->Id;
return B1->Id < B2->Id;
return std::make_tuple(A1->Id, B1->Id) < std::make_tuple(A2->Id, B2->Id);
};

while (HotChains.size() > 1) {
Expand Down Expand Up @@ -769,24 +790,22 @@ class ExtTSPImpl {
}

/// Compute the Ext-TSP score for a given node order and a list of jumps.
double extTSPScore(const MergedChain &MergedBlocks,
const std::vector<JumpT *> &Jumps) const {
if (Jumps.empty())
return 0.0;
double extTSPScore(const MergedNodesT &Nodes,
const MergedJumpsT &Jumps) const {
uint64_t CurAddr = 0;
MergedBlocks.forEach([&](const NodeT *Node) {
Nodes.forEach([&](const NodeT *Node) {
Node->EstimatedAddr = CurAddr;
CurAddr += Node->Size;
});

double Score = 0;
for (JumpT *Jump : Jumps) {
Jumps.forEach([&](const JumpT *Jump) {
const NodeT *SrcBlock = Jump->Source;
const NodeT *DstBlock = Jump->Target;
Score += ::extTSPScore(SrcBlock->EstimatedAddr, SrcBlock->Size,
DstBlock->EstimatedAddr, Jump->ExecutionCount,
Jump->IsConditional);
}
});
return Score;
}

Expand All @@ -798,17 +817,13 @@ class ExtTSPImpl {
/// element being the corresponding merging type.
MergeGainT getBestMergeGain(ChainT *ChainPred, ChainT *ChainSucc,
ChainEdge *Edge) const {
if (Edge->hasCachedMergeGain(ChainPred, ChainSucc)) {
if (Edge->hasCachedMergeGain(ChainPred, ChainSucc))
return Edge->getCachedMergeGain(ChainPred, ChainSucc);
}

assert(!Edge->jumps().empty() && "trying to merge chains w/o jumps");
// Precompute jumps between ChainPred and ChainSucc.
auto Jumps = Edge->jumps();
ChainEdge *EdgePP = ChainPred->getEdge(ChainPred);
if (EdgePP != nullptr) {
Jumps.insert(Jumps.end(), EdgePP->jumps().begin(), EdgePP->jumps().end());
}
assert(!Jumps.empty() && "trying to merge chains w/o jumps");
MergedJumpsT Jumps(&Edge->jumps(), EdgePP ? &EdgePP->jumps() : nullptr);

// This object holds the best chosen gain of merging two chains.
MergeGainT Gain = MergeGainT();
Expand Down Expand Up @@ -875,19 +890,20 @@ class ExtTSPImpl {
///
/// The two chains are not modified in the method.
MergeGainT computeMergeGain(const ChainT *ChainPred, const ChainT *ChainSucc,
const std::vector<JumpT *> &Jumps,
size_t MergeOffset, MergeTypeT MergeType) const {
auto MergedBlocks =
const MergedJumpsT &Jumps, size_t MergeOffset,
MergeTypeT MergeType) const {
MergedNodesT MergedNodes =
mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType);

// Do not allow a merge that does not preserve the original entry point.
if ((ChainPred->isEntry() || ChainSucc->isEntry()) &&
!MergedBlocks.getFirstNode()->isEntry())
!MergedNodes.getFirstNode()->isEntry())
return MergeGainT();

// The gain for the new chain.
auto NewGainScore = extTSPScore(MergedBlocks, Jumps) - ChainPred->Score;
return MergeGainT(NewGainScore, MergeOffset, MergeType);
double NewScore = extTSPScore(MergedNodes, Jumps);
double CurScore = ChainPred->Score;
return MergeGainT(NewScore - CurScore, MergeOffset, MergeType);
}

/// Merge chain From into chain Into, update the list of active chains,
Expand All @@ -897,7 +913,7 @@ class ExtTSPImpl {
assert(Into != From && "a chain cannot be merged with itself");

// Merge the nodes.
MergedChain MergedNodes =
MergedNodesT MergedNodes =
mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType);
Into->merge(From, MergedNodes.getNodes());

Expand All @@ -908,8 +924,9 @@ class ExtTSPImpl {
// Update cached ext-tsp score for the new chain.
ChainEdge *SelfEdge = Into->getEdge(Into);
if (SelfEdge != nullptr) {
MergedNodes = MergedChain(Into->Nodes.begin(), Into->Nodes.end());
Into->Score = extTSPScore(MergedNodes, SelfEdge->jumps());
MergedNodes = MergedNodesT(Into->Nodes.begin(), Into->Nodes.end());
MergedJumpsT MergedJumps(&SelfEdge->jumps());
Into->Score = extTSPScore(MergedNodes, MergedJumps);
}

// Remove the chain from the list of active chains.
Expand Down Expand Up @@ -943,7 +960,7 @@ class ExtTSPImpl {
// Sorting chains by density in the decreasing order.
std::sort(SortedChains.begin(), SortedChains.end(),
[&](const ChainT *L, const ChainT *R) {
// Place the entry point is at the beginning of the order.
// Place the entry point at the beginning of the order.
if (L->isEntry() != R->isEntry())
return L->isEntry();

Expand Down Expand Up @@ -1163,9 +1180,9 @@ class CDSortImpl {
/// result is a pair with the first element being the gain and the second
/// element being the corresponding merging type.
MergeGainT getBestMergeGain(ChainEdge *Edge) const {
assert(!Edge->jumps().empty() && "trying to merge chains w/o jumps");
// Precompute jumps between ChainPred and ChainSucc.
auto Jumps = Edge->jumps();
assert(!Jumps.empty() && "trying to merge chains w/o jumps");
MergedJumpsT Jumps(&Edge->jumps());
ChainT *SrcChain = Edge->srcChain();
ChainT *DstChain = Edge->dstChain();

Expand Down Expand Up @@ -1204,7 +1221,7 @@ class CDSortImpl {
///
/// The two chains are not modified in the method.
MergeGainT computeMergeGain(ChainT *ChainPred, ChainT *ChainSucc,
const std::vector<JumpT *> &Jumps,
const MergedJumpsT &Jumps,
MergeTypeT MergeType) const {
// This doesn't depend on the ordering of the nodes
double FreqGain = freqBasedLocalityGain(ChainPred, ChainSucc);
Expand Down Expand Up @@ -1255,24 +1272,22 @@ class CDSortImpl {
}

/// Compute the change of the distance locality after merging the chains.
double distBasedLocalityGain(const MergedChain &MergedBlocks,
const std::vector<JumpT *> &Jumps) const {
if (Jumps.empty())
return 0.0;
double distBasedLocalityGain(const MergedNodesT &Nodes,
const MergedJumpsT &Jumps) const {
uint64_t CurAddr = 0;
MergedBlocks.forEach([&](const NodeT *Node) {
Nodes.forEach([&](const NodeT *Node) {
Node->EstimatedAddr = CurAddr;
CurAddr += Node->Size;
});

double CurScore = 0;
double NewScore = 0;
for (const JumpT *Arc : Jumps) {
uint64_t SrcAddr = Arc->Source->EstimatedAddr + Arc->Offset;
uint64_t DstAddr = Arc->Target->EstimatedAddr;
NewScore += distScore(SrcAddr, DstAddr, Arc->ExecutionCount);
CurScore += distScore(0, TotalSize, Arc->ExecutionCount);
}
Jumps.forEach([&](const JumpT *Jump) {
uint64_t SrcAddr = Jump->Source->EstimatedAddr + Jump->Offset;
uint64_t DstAddr = Jump->Target->EstimatedAddr;
NewScore += distScore(SrcAddr, DstAddr, Jump->ExecutionCount);
CurScore += distScore(0, TotalSize, Jump->ExecutionCount);
});
return NewScore - CurScore;
}

Expand All @@ -1283,7 +1298,7 @@ class CDSortImpl {
assert(Into != From && "a chain cannot be merged with itself");

// Merge the nodes.
MergedChain MergedNodes =
MergedNodesT MergedNodes =
mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType);
Into->merge(From, MergedNodes.getNodes());

Expand Down