Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,12 @@ static cl::opt<unsigned>
cl::desc("Maximum cost accepted for the transformation"),
cl::Hidden, cl::init(50));

static cl::opt<double> MaxClonedRate(
"dfa-max-cloned-rate",
cl::desc(
"Maximum cloned instructions rate accepted for the transformation"),
cl::Hidden, cl::init(7.5));

namespace {

class SelectInstToUnfold {
Expand Down Expand Up @@ -828,6 +834,7 @@ struct TransformDFA {
/// also returns false if it is illegal to clone some required block.
bool isLegalAndProfitableToTransform() {
CodeMetrics Metrics;
uint64_t NumClonedInst = 0;
SwitchInst *Switch = SwitchPaths->getSwitchInst();

// Don't thread switch without multiple successors.
Expand All @@ -837,7 +844,6 @@ struct TransformDFA {
// Note that DuplicateBlockMap is not being used as intended here. It is
// just being used to ensure (BB, State) pairs are only counted once.
DuplicateBlockMap DuplicateMap;

for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
PathType PathBBs = TPath.getPath();
APInt NextState = TPath.getExitValue();
Expand All @@ -848,6 +854,7 @@ struct TransformDFA {
BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
if (!VisitedBB) {
Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
NumClonedInst += BB->sizeWithoutDebug();
DuplicateMap[BB].push_back({BB, NextState});
}

Expand All @@ -865,6 +872,7 @@ struct TransformDFA {
if (VisitedBB)
continue;
Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
NumClonedInst += BB->sizeWithoutDebug();
DuplicateMap[BB].push_back({BB, NextState});
}

Expand Down Expand Up @@ -901,6 +909,22 @@ struct TransformDFA {
}
}

// Too much cloned instructions slow down later optimizations, especially
// SLPVectorizer.
// TODO: Thread the switch partially before reaching the threshold.
uint64_t NumOrigInst = 0;
for (auto *BB : DuplicateMap.keys())
NumOrigInst += BB->sizeWithoutDebug();
if (double(NumClonedInst) / double(NumOrigInst) > MaxClonedRate) {
LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much "
"instructions wll be cloned\n");
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch)
<< "Too much instructions will be cloned.";
});
return false;
}

InstructionCost DuplicationCost = 0;

unsigned JumpTableSize = 0;
Expand Down