Skip to content

Commit

Permalink
[SLP][NFC]Improve compile time by size analysis limit and reduction size
Browse files Browse the repository at this point in the history
limit.

Used RecursionMaxDepth to limit number of lookups in BoUpSLP::getVectorElementSize and limited reduction width for bool reduced values.
  • Loading branch information
alexey-bataev committed Mar 27, 2024
1 parent 5da3937 commit b7a4ace
Showing 1 changed file with 21 additions and 11 deletions.
32 changes: 21 additions & 11 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13928,26 +13928,29 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
// that feed it. The type of the loaded value may indicate a more suitable
// width than V's type. We want to base the vector element size on the width
// of memory operations where possible.
SmallVector<std::pair<Instruction *, BasicBlock *>, 16> Worklist;
SmallVector<std::tuple<Instruction *, BasicBlock *, unsigned>> Worklist;
SmallPtrSet<Instruction *, 16> Visited;
if (auto *I = dyn_cast<Instruction>(V)) {
Worklist.emplace_back(I, I->getParent());
Worklist.emplace_back(I, I->getParent(), 0);
Visited.insert(I);
}

// Traverse the expression tree in bottom-up order looking for loads. If we
// encounter an instruction we don't yet handle, we give up.
auto Width = 0u;
Value *FirstNonBool = nullptr;
while (!Worklist.empty()) {
Instruction *I;
BasicBlock *Parent;
std::tie(I, Parent) = Worklist.pop_back_val();
auto [I, Parent, Level] = Worklist.pop_back_val();

// We should only be looking at scalar instructions here. If the current
// instruction has a vector type, skip.
auto *Ty = I->getType();
if (isa<VectorType>(Ty))
continue;
if (Ty != Builder.getInt1Ty() && !FirstNonBool)
FirstNonBool = I;
if (Level > RecursionMaxDepth)
continue;

// If the current instruction is a load, update MaxWidth to reflect the
// width of the loaded value.
Expand All @@ -13960,11 +13963,16 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
// user or the use is a PHI node, we add it to the worklist.
else if (isa<PHINode, CastInst, GetElementPtrInst, CmpInst, SelectInst,
BinaryOperator, UnaryOperator>(I)) {
for (Use &U : I->operands())
for (Use &U : I->operands()) {
if (auto *J = dyn_cast<Instruction>(U.get()))
if (Visited.insert(J).second &&
(isa<PHINode>(I) || J->getParent() == Parent))
Worklist.emplace_back(J, J->getParent());
(isa<PHINode>(I) || J->getParent() == Parent)) {
Worklist.emplace_back(J, J->getParent(), Level + 1);
continue;
}
if (!FirstNonBool && U.get()->getType() != Builder.getInt1Ty())
FirstNonBool = U.get();
}
} else {
break;
}
Expand All @@ -13974,8 +13982,8 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
// gave up for some reason, just return the width of V. Otherwise, return the
// maximum width we found.
if (!Width) {
if (auto *CI = dyn_cast<CmpInst>(V))
V = CI->getOperand(0);
if (V->getType() == Builder.getInt1Ty() && FirstNonBool)
V = FirstNonBool;
Width = DL->getTypeSizeInBits(V->getType());
}

Expand Down Expand Up @@ -15838,7 +15846,9 @@ class HorizontalReduction {
RegMaxNumber * llvm::bit_floor(MaxVecRegSize / EltSize);

unsigned ReduxWidth = std::min<unsigned>(
llvm::bit_floor(NumReducedVals), std::max(RedValsMaxNumber, MaxElts));
llvm::bit_floor(NumReducedVals),
std::clamp<unsigned>(MaxElts, RedValsMaxNumber,
RegMaxNumber * RedValsMaxNumber));
unsigned Start = 0;
unsigned Pos = Start;
// Restarts vectorization attempt with lower vector factor.
Expand Down

0 comments on commit b7a4ace

Please sign in to comment.