Skip to content

Commit

Permalink
[SLP][NFC]Make collectValuesToDemote member of BoUpSLP to avoid using
Browse files Browse the repository at this point in the history
Expr container, NFC.

Saves the memory and may improve compile time.
  • Loading branch information
alexey-bataev committed Nov 17, 2023
1 parent 2402b14 commit 52df67b
Showing 1 changed file with 32 additions and 42 deletions.
74 changes: 32 additions & 42 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2285,6 +2285,14 @@ class BoUpSLP {
~BoUpSLP();

private:
/// Determine if a vectorized value \p V in can be demoted to
/// a smaller type with a truncation. We collect the values that will be
/// demoted in ToDemote and additional roots that require investigating in
/// Roots.
bool collectValuesToDemote(Value *V, SmallVectorImpl<Value *> &ToDemote,
SmallVectorImpl<Value *> &Roots,
DenseSet<Value *> &Visited) const;

/// Check if the operands on the edges \p Edges of the \p UserTE allows
/// reordering (i.e. the operands can be reordered because they have only one
/// user and reordarable).
Expand Down Expand Up @@ -9024,8 +9032,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
// for the extract and the added cost of the sign extend if needed.
auto *VecTy = FixedVectorType::get(EU.Scalar->getType(), BundleWidth);
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
auto *ScalarRoot = VectorizableTree[0]->Scalars[0];
auto It = MinBWs.find(ScalarRoot);
auto It = MinBWs.find(EU.Scalar);
if (It != MinBWs.end()) {
auto *MinTy = IntegerType::get(F->getContext(), It->second.first);
unsigned Extend =
Expand Down Expand Up @@ -13059,19 +13066,20 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
// Determine if a value V in a vectorizable expression Expr can be demoted to a
// smaller type with a truncation. We collect the values that will be demoted
// in ToDemote and additional roots that require investigating in Roots.
static bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr,
SmallVectorImpl<Value *> &ToDemote,
SmallVectorImpl<Value *> &Roots) {
bool BoUpSLP::collectValuesToDemote(Value *V,
SmallVectorImpl<Value *> &ToDemote,
SmallVectorImpl<Value *> &Roots,
DenseSet<Value *> &Visited) const {
// We can always demote constants.
if (isa<Constant>(V)) {
ToDemote.push_back(V);
return true;
}

// If the value is not an instruction in the expression with only one use, it
// cannot be demoted.
// If the value is not a vectorized instruction in the expression with only
// one use, it cannot be demoted.
auto *I = dyn_cast<Instruction>(V);
if (!I || !I->hasOneUse() || !Expr.count(I))
if (!I || !I->hasOneUse() || !getTreeEntry(I) || !Visited.insert(I).second)
return false;

switch (I->getOpcode()) {
Expand All @@ -13095,16 +13103,16 @@ static bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr,
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
if (!collectValuesToDemote(I->getOperand(0), Expr, ToDemote, Roots) ||
!collectValuesToDemote(I->getOperand(1), Expr, ToDemote, Roots))
if (!collectValuesToDemote(I->getOperand(0), ToDemote, Roots, Visited) ||
!collectValuesToDemote(I->getOperand(1), ToDemote, Roots, Visited))
return false;
break;

// We can demote selects if we can demote their true and false values.
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
if (!collectValuesToDemote(SI->getTrueValue(), Expr, ToDemote, Roots) ||
!collectValuesToDemote(SI->getFalseValue(), Expr, ToDemote, Roots))
if (!collectValuesToDemote(SI->getTrueValue(), ToDemote, Roots, Visited) ||
!collectValuesToDemote(SI->getFalseValue(), ToDemote, Roots, Visited))
return false;
break;
}
Expand All @@ -13114,7 +13122,7 @@ static bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr,
case Instruction::PHI: {
PHINode *PN = cast<PHINode>(I);
for (Value *IncValue : PN->incoming_values())
if (!collectValuesToDemote(IncValue, Expr, ToDemote, Roots))
if (!collectValuesToDemote(IncValue, ToDemote, Roots, Visited))
return false;
break;
}
Expand All @@ -13141,36 +13149,16 @@ void BoUpSLP::computeMinimumValueSizes() {
if (!TreeRootIT)
return;

// If the expression is not rooted by a store, these roots should have
// external uses.
// TOSO: investigate if this can be relaxed.
SmallPtrSet<Value *, 32> Expr(TreeRoot.begin(), TreeRoot.end());
for (auto &EU : ExternalUses)
if (!Expr.erase(EU.Scalar))
return;
if (!Expr.empty())
return;

// Collect the scalar values of the vectorizable expression. We will use this
// context to determine which values can be demoted. If we see a truncation,
// we mark it as seeding another demotion.
for (auto &EntryPtr : VectorizableTree)
Expr.insert(EntryPtr->Scalars.begin(), EntryPtr->Scalars.end());

// Ensure the roots of the vectorizable tree don't form a cycle. They must
// have a single external user that is not in the vectorizable tree.
for (auto *Root : TreeRoot)
if (!Root->hasOneUse() || Expr.count(*Root->user_begin()))
return;

// Conservatively determine if we can actually truncate the roots of the
// expression. Collect the values that can be demoted in ToDemote and
// additional roots that require investigating in Roots.
SmallVector<Value *, 32> ToDemote;
SmallVector<Value *, 4> Roots;
for (auto *Root : TreeRoot)
if (!collectValuesToDemote(Root, Expr, ToDemote, Roots))
for (auto *Root : TreeRoot) {
DenseSet<Value *> Visited;
if (!collectValuesToDemote(Root, ToDemote, Roots, Visited))
return;
}

// The maximum bit width required to represent all the values that can be
// demoted without loss of precision. It would be safe to truncate the roots
Expand Down Expand Up @@ -13200,9 +13188,9 @@ void BoUpSLP::computeMinimumValueSizes() {
// maximum bit width required to store the scalar by using ValueTracking to
// compute the number of high-order bits we can truncate.
if (MaxBitWidth == DL->getTypeSizeInBits(TreeRoot[0]->getType()) &&
llvm::all_of(TreeRoot, [](Value *R) {
assert(R->hasOneUse() && "Root should have only one use!");
return isa<GetElementPtrInst>(R->user_back());
all_of(TreeRoot, [](Value *V) {
return all_of(V->users(),
[](User *U) { return isa<GetElementPtrInst>(U); });
})) {
MaxBitWidth = 8u;

Expand Down Expand Up @@ -13251,8 +13239,10 @@ void BoUpSLP::computeMinimumValueSizes() {
// If we can truncate the root, we must collect additional values that might
// be demoted as a result. That is, those seeded by truncations we will
// modify.
while (!Roots.empty())
collectValuesToDemote(Roots.pop_back_val(), Expr, ToDemote, Roots);
while (!Roots.empty()) {
DenseSet<Value *> Visited;
collectValuesToDemote(Roots.pop_back_val(), ToDemote, Roots, Visited);
}

// Finally, map the values we can demote to the maximum bit with we computed.
DenseMap<const TreeEntry *, bool> Signendness;
Expand Down

4 comments on commit 52df67b

@alexfh
Copy link
Contributor

@alexfh alexfh commented on 52df67b Nov 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi Alexey, we started seeing spurious clang crashes after this patch. Assertions-enabled clang neither crashes nor reports any assertion failures. Stack traces change from crash to crash, but some of them look related to this patch:

 #0 0x00005652d9747b38 llvm::sys::RunSignalHandlers() (clang+0x9747b38)
 #1 0x00005652d9708176 CrashRecoverySignalHandler(int) (clang+0x9708176)
 #2 0x00007fda8c8af1c0 __restore_rt (libpthread.so.0+0x151c0)
 #3 0x00005652d955b88b llvm::ICmpInst::compare(llvm::APInt const&, llvm::APInt const&, llvm::CmpInst::Predicate) (clang+0x955b88b)
 #4 0x00005652d4a7674c llvm::IRBuilderBase::CreateIntCast(llvm::Value*, llvm::Type*, bool, llvm::Twine const&) (clang+0x4a7674c)
 #5 0x00005652d8e2cb5b llvm::slpvectorizer::BoUpSLP::vectorizeTree(llvm::slpvectorizer::BoUpSLP::TreeEntry*, bool) (clang+0x8e2cb5b)
 #6 0x00005652d8e31fec llvm::slpvectorizer::BoUpSLP::vectorizeTree(llvm::MapVector<llvm::Value*, llvm::SmallVector<llvm::Instruction*, 2u>, llvm::DenseMap<llvm::Value*, unsigned int, llvm::DenseMapInfo<llvm::Value*, void>, llvm::detail::DenseMapPair<llvm::Value*, unsigned int>>, llvm::SmallVector<std::__u::pair<llvm::Value*, llvm::SmallVector<llvm::Instruction*, 2u>>, 0u>> const&, llvm::SmallVectorImpl<std::__u::pair<llvm::Value*, llvm::Value*>>&, llvm::Instruction*) (clang+0x8e31fec)
 #7 0x00005652d8e31dd7 llvm::slpvectorizer::BoUpSLP::vectorizeTree() (clang+0x8e31dd7)
 #8 0x00005652d8e408c6 llvm::SLPVectorizerPass::tryToVectorizeList(llvm::ArrayRef<llvm::Value*>, llvm::slpvectorizer::BoUpSLP&, bool) (clang+0x8e408c6)
 #9 0x00005652d8e45255 bool tryToVectorizeSequence<llvm::Value>(llvm::SmallVectorImpl<llvm::Value*>&, llvm::function_ref<bool (llvm::Value*, llvm::Value*)>, llvm::function_ref<bool (llvm::Value*, llvm::Value*)>, llvm::function_ref<bool (llvm::ArrayRef<llvm::Value*>, bool)>, bool, llvm::slpvectorizer::BoUpSLP&) (clang+0x8e45255)
#10 0x00005652d8e3c69f llvm::SLPVectorizerPass::vectorizeChainsInBlock(llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&) (clang+0x8e3c69f)
#11 0x00005652d8e3aced llvm::SLPVectorizerPass::runImpl(llvm::Function&, llvm::ScalarEvolution*, llvm::TargetTransformInfo*, llvm::TargetLibraryInfo*, llvm::AAResults*, llvm::LoopInfo*, llvm::DominatorTree*, llvm::AssumptionCache*, llvm::DemandedBits*, llvm::OptimizationRemarkEmitter*) (clang+0x8e3aced)
#12 0x00005652d8e3a606 llvm::SLPVectorizerPass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (clang+0x8e3a606)
#13 0x00005652d7f09792 llvm::detail::PassModel<llvm::Function, llvm::SLPVectorizerPass, llvm::PreservedAnalyses, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (clang+0x7f09792)
#14 0x00005652d95a4ef5 llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (clang+0x95a4ef5)
#15 0x00005652d4d803f2 llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::PreservedAnalyses, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (clang+0x4d803f2)
#16 0x00005652d95a73a9 llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (clang+0x95a73a9)
#17 0x00005652d4d7b6d2 llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::PreservedAnalyses, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (clang+0x4d7b6d2)
#18 0x00005652d95a44f5 llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (clang+0x95a44f5)
#19 0x00005652d4d77c93 (anonymous namespace)::EmitAssemblyHelper::RunOptimizationPipeline(clang::BackendAction, std::__u::unique_ptr<llvm::raw_pwrite_stream, std::__u::default_delete<llvm::raw_pwrite_stream>>&, std::__u::unique_ptr<llvm::ToolOutputFile, std::__u::default_delete<llvm::ToolOutputFile>>&, clang::BackendConsumer*) (clang+0x4d77c93)
#20 0x00005652d4d6e29c clang::EmitBackendOutput(clang::DiagnosticsEngine&, clang::HeaderSearchOptions const&, clang::CodeGenOptions const&, clang::TargetOptions const&, clang::LangOptions const&, llvm::StringRef, llvm::Module*, clang::BackendAction, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::__u::unique_ptr<llvm::raw_pwrite_stream, std::__u::default_delete<llvm::raw_pwrite_stream>>, clang::BackendConsumer*) (clang+0x4d6e29c)
#21 0x00005652d4a57b86 clang::CodeGenAction::ExecuteAction() (clang+0x4a57b86)
#22 0x00005652d55fc4da clang::FrontendAction::Execute() (clang+0x55fc4da)
#23 0x00005652d556e714 clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (clang+0x556e714)
#24 0x00005652d4703aa5 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (clang+0x4703aa5)
#25 0x00005652d46f75e2 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) (clang+0x46f75e2)
#26 0x00005652d46f50c4 ExecuteCC1Tool(llvm::SmallVectorImpl<char const*>&, llvm::ToolContext const&) (clang+0x46f50c4)
#27 0x00005652d5703d5e void llvm::function_ref<void ()>::callback_fn<clang::driver::CC1Command::Execute(llvm::ArrayRef<std::__u::optional<llvm::StringRef>>, std::__u::basic_string<char, std::__u::char_traits<char>, std::__u::allocator<char>>*, bool*) const::$_0>(long) (clang+0x5703d5e)
#28 0x00005652d9707f4f llvm::CrashRecoveryContext::RunSafely(llvm::function_ref<void ()>) (clang+0x9707f4f)
#29 0x00005652d570347b clang::driver::CC1Command::Execute(llvm::ArrayRef<std::__u::optional<llvm::StringRef>>, std::__u::basic_string<char, std::__u::char_traits<char>, std::__u::allocator<char>>*, bool*) const (clang+0x570347b)
#30 0x00005652d56c2982 clang::driver::Compilation::ExecuteCommand(clang::driver::Command const&, clang::driver::Command const*&, bool) const (clang+0x56c2982)
#31 0x00005652d56c2ecf clang::driver::Compilation::ExecuteJobs(clang::driver::JobList const&, llvm::SmallVectorImpl<std::__u::pair<int, clang::driver::Command const*>>&, bool) const (clang+0x56c2ecf)
#32 0x00005652d56e2f70 clang::driver::Driver::ExecuteCompilation(clang::driver::Compilation&, llvm::SmallVectorImpl<std::__u::pair<int, clang::driver::Command const*>>&) (clang+0x56e2f70)
#33 0x00005652d46f452e clang_main(int, char**, llvm::ToolContext const&) (clang+0x46f452e)
#34 0x00005652d46f1484 main (clang+0x46f1484)
#35 0x00007fda8c740633 __libc_start_main (libc.so.6+0x61633)
#36 0x00005652d46f13ea _start (clang+0x46f13ea)

I'm working on a test case.

@alexfh
Copy link
Contributor

@alexfh alexfh commented on 52df67b Nov 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, I was able to see an assertion failure on that input using ToT clang:

assert.h assertion failed at llvm-project/llvm/include/llvm/ADT/DenseMap.h:1270 in pointer llvm::DenseMapIterator<llvm::Value *, std::pair<unsigned long, bool>>::operator->() const [KeyT = llvm::Value *, ValueT = std::pair<unsigned long, bool>, KeyInfoT = llvm::DenseMapInfo<llvm::Value *>, Bucket = llvm::detail::DenseMapPair<llvm::Value *, std::pair<unsigned long, bool>>, IsConst = false]: Ptr != End && "dereferencing end() iterator"
    @     0x5607e8be16ea  absl::log_internal::LogMessage::PrepareToDie()
    @     0x5607e8be1040  absl::log_internal::LogMessage::SendToLog()
    @     0x5607e8bdfe0c  absl::log_internal::LogMessage::Flush()
    @     0x5607e8be1839  absl::log_internal::LogMessageFatal::~LogMessageFatal()
    @     0x5607e8b28fa3  __assert_fail
    @     0x5607e72e99f1  llvm::DenseMapIterator<>::operator->()
    @     0x5607e728bab2  llvm::slpvectorizer::BoUpSLP::vectorizeTree()
    @     0x5607e728fef5  llvm::slpvectorizer::BoUpSLP::vectorizeTree()
    @     0x5607e728f6ef  llvm::slpvectorizer::BoUpSLP::vectorizeTree()
    @     0x5607e72a91fa  llvm::SLPVectorizerPass::tryToVectorizeList()
    @     0x5607e72e3bbf  llvm::function_ref<>::callback_fn<>()
    @     0x5607e72b1b38  tryToVectorizeSequence<>()
    @     0x5607e72a2050  llvm::SLPVectorizerPass::vectorizeChainsInBlock()
    @     0x5607e729fc80  llvm::SLPVectorizerPass::runImpl()
    @     0x5607e729eca1  llvm::SLPVectorizerPass::run()
    @     0x5607e5242ad2  llvm::detail::PassModel<>::run()
    @     0x5607e84d1be2  llvm::PassManager<>::run()
    @     0x5607df1f83d2  llvm::detail::PassModel<>::run()
    @     0x5607e84cfd05  llvm::ModuleToFunctionPassAdaptor::run()
    @     0x5607df1f1d72  llvm::detail::PassModel<>::run()
    @     0x5607e84d0547  llvm::PassManager<>::run()
    @     0x5607df1e37e6  (anonymous namespace)::EmitAssemblyHelper::RunOptimizationPipeline()
    @     0x5607df1d6dc4  clang::EmitBackendOutput()
    @     0x5607dead92b3  clang::CodeGenAction::ExecuteAction()
    @     0x5607e03acf73  clang::FrontendAction::Execute()
    @     0x5607e02ba61e  clang::CompilerInstance::ExecuteAction()
    @     0x5607de39bb14  clang::ExecuteCompilerInvocation()
    @     0x5607de396bba  cc1_main()
    @     0x5607de38056a  ExecuteCC1Tool()
    @     0x5607de3807b7  llvm::function_ref<>::callback_fn<>()
    @     0x5607e05cc832  llvm::function_ref<>::callback_fn<>()
    @     0x5607e87e4bfa  llvm::CrashRecoveryContext::RunSafely()
    @     0x5607e05cc569  clang::driver::CC1Command::Execute()
    @     0x5607e055d384  clang::driver::Compilation::ExecuteCommand()
    @     0x5607e055d89e  clang::driver::Compilation::ExecuteJobs()
    @     0x5607e0589495  clang::driver::Driver::ExecuteCompilation()
    @     0x5607de37f735  clang_main()
    @     0x5607de37afe0  main
    @     0x7f6a2bf6c633  __libc_start_main
    @     0x5607de2a0d6a  _start

@alexfh
Copy link
Contributor

@alexfh alexfh commented on 52df67b Nov 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test case: https://gcc.godbolt.org/z/bfE8oajjq

I'm going to revert the commit.

@alexfh
Copy link
Contributor

@alexfh alexfh commented on 52df67b Nov 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reverted in af7a145.

Please sign in to comment.