From 1010fc02a4d8cf17abde4651e6fd68f28861d17d Mon Sep 17 00:00:00 2001 From: XChy Date: Thu, 2 Oct 2025 14:13:45 +0800 Subject: [PATCH 1/3] [DFAJumpThreading] Constraint the number of cloned instructions --- .../Transforms/Scalar/DFAJumpThreading.cpp | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index e9a3e983bc1e2..3bd7f8e2fcf93 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -120,6 +120,12 @@ static cl::opt cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50)); +static cl::opt MaxClonedRate( + "dfa-max-cloned-rate", + cl::desc( + "Maximum cloned instructions rate accepted for the transformation"), + cl::Hidden, cl::init(7.5)); + namespace { class SelectInstToUnfold { @@ -828,6 +834,7 @@ struct TransformDFA { /// also returns false if it is illegal to clone some required block. bool isLegalAndProfitableToTransform() { CodeMetrics Metrics; + uint64_t NumClonedInst = 0; SwitchInst *Switch = SwitchPaths->getSwitchInst(); // Don't thread switch without multiple successors. @@ -837,7 +844,6 @@ struct TransformDFA { // Note that DuplicateBlockMap is not being used as intended here. It is // just being used to ensure (BB, State) pairs are only counted once. DuplicateBlockMap DuplicateMap; - for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { PathType PathBBs = TPath.getPath(); APInt NextState = TPath.getExitValue(); @@ -848,6 +854,7 @@ struct TransformDFA { BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap); if (!VisitedBB) { Metrics.analyzeBasicBlock(BB, *TTI, EphValues); + NumClonedInst += range_size(*BB); DuplicateMap[BB].push_back({BB, NextState}); } @@ -865,6 +872,7 @@ struct TransformDFA { if (VisitedBB) continue; Metrics.analyzeBasicBlock(BB, *TTI, EphValues); + NumClonedInst += range_size(*BB); DuplicateMap[BB].push_back({BB, NextState}); } @@ -901,6 +909,22 @@ struct TransformDFA { } } + // Too much cloned instructions slow down later optimizations, especially + // SLPVectorizer. + // TODO: Thread the switch partially before reaching the threshold. + uint64_t NumOrigInst = 0; + for (auto &[BB, _] : DuplicateMap) + NumOrigInst += range_size(*BB); + if (double(NumClonedInst) / double(NumOrigInst) > MaxClonedRate) { + LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much " + "instructions wll be cloned\n"); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch) + << "Too much instructions will be cloned."; + }); + return false; + } + InstructionCost DuplicationCost = 0; unsigned JumpTableSize = 0; From 3bb61a851bd5c44d268df4aae630d930c10ccb04 Mon Sep 17 00:00:00 2001 From: XChy Date: Fri, 3 Oct 2025 15:55:59 +0800 Subject: [PATCH 2/3] use keys --- llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index 3bd7f8e2fcf93..0b76cd06f58e9 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -913,7 +913,7 @@ struct TransformDFA { // SLPVectorizer. // TODO: Thread the switch partially before reaching the threshold. uint64_t NumOrigInst = 0; - for (auto &[BB, _] : DuplicateMap) + for (auto *BB : DuplicateMap.keys()) NumOrigInst += range_size(*BB); if (double(NumClonedInst) / double(NumOrigInst) > MaxClonedRate) { LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much " From 2bad642faef89522d8791e6b7b8cc4340198cef5 Mon Sep 17 00:00:00 2001 From: XChy Date: Sat, 4 Oct 2025 12:17:18 +0800 Subject: [PATCH 3/3] use sizeWithoutDebug --- llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index 0b76cd06f58e9..41a6c80943328 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -854,7 +854,7 @@ struct TransformDFA { BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap); if (!VisitedBB) { Metrics.analyzeBasicBlock(BB, *TTI, EphValues); - NumClonedInst += range_size(*BB); + NumClonedInst += BB->sizeWithoutDebug(); DuplicateMap[BB].push_back({BB, NextState}); } @@ -872,7 +872,7 @@ struct TransformDFA { if (VisitedBB) continue; Metrics.analyzeBasicBlock(BB, *TTI, EphValues); - NumClonedInst += range_size(*BB); + NumClonedInst += BB->sizeWithoutDebug(); DuplicateMap[BB].push_back({BB, NextState}); } @@ -914,7 +914,7 @@ struct TransformDFA { // TODO: Thread the switch partially before reaching the threshold. uint64_t NumOrigInst = 0; for (auto *BB : DuplicateMap.keys()) - NumOrigInst += range_size(*BB); + NumOrigInst += BB->sizeWithoutDebug(); if (double(NumClonedInst) / double(NumOrigInst) > MaxClonedRate) { LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much " "instructions wll be cloned\n");