diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 0eb8ad8d3c93d..7a9a2a39bb65c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11315,44 +11315,91 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality( VL, *this, TryCopyableElementsVectorization, /*WithProfitabilityCheck=*/true, TryCopyableElementsVectorization); + bool AreScatterAllGEPSameBlock = false; + if (!S) { + SmallVector SortedIndices; + BasicBlock *BB = nullptr; + bool IsScatterVectorizeUserTE = + UserTreeIdx.UserTE && + UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize; + AreScatterAllGEPSameBlock = + (IsScatterVectorizeUserTE && VL.front()->getType()->isPointerTy() && + VL.size() > 2 && + all_of(VL, + [&BB](Value *V) { + auto *I = dyn_cast(V); + if (!I) + return doesNotNeedToBeScheduled(V); + if (!BB) + BB = I->getParent(); + return BB == I->getParent() && I->getNumOperands() == 2; + }) && + BB && + sortPtrAccesses(VL, UserTreeIdx.UserTE->getMainOp()->getType(), *DL, + *SE, SortedIndices)); + if (!AreScatterAllGEPSameBlock) { + LLVM_DEBUG(dbgs() << "SLP: Try split and if failed, gathering due to " + "C,S,B,O, small shuffle. \n"; + dbgs() << "["; + interleaveComma(VL, dbgs(), [&](Value *V) { dbgs() << *V; }); + dbgs() << "]\n"); + return ScalarsVectorizationLegality(S, /*IsLegal=*/false, + /*TryToFindDuplicates=*/true, + /*TrySplitVectorize=*/true); + } + // Reset S to make it GetElementPtr kind of node. + const auto *It = find_if(VL, IsaPred); + assert(It != VL.end() && "Expected at least one GEP."); + S = getSameOpcode(*It, *TLI); + } + assert(S && "Must be valid."); + + // Don't handle vectors. + if (!SLPReVec && getValueType(VL.front())->isVectorTy()) { + LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n"); + // Do not try to pack to avoid extra instructions here. + return ScalarsVectorizationLegality(S, /*IsLegal=*/false, + /*TryToFindDuplicates=*/false); + } + + // Check that all of the users of the scalars that we want to vectorize are + // schedulable. + BasicBlock *BB = S.getMainOp()->getParent(); + + if (BB->isEHPad() || isa_and_nonnull(BB->getTerminator()) || + !DT->isReachableFromEntry(BB)) { + // Don't go into unreachable blocks. They may contain instructions with + // dependency cycles which confuse the final scheduling. + // Do not vectorize EH and non-returning blocks, not profitable in most + // cases. + LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n"); + return ScalarsVectorizationLegality(S, /*IsLegal=*/false); + } + // Don't go into catchswitch blocks, which can happen with PHIs. // Such blocks can only have PHIs and the catchswitch. There is no // place to insert a shuffle if we need to, so just avoid that issue. - if (S && isa(S.getMainOp()->getParent()->getTerminator())) { + if (isa(BB->getTerminator())) { LLVM_DEBUG(dbgs() << "SLP: bundle in catchswitch block.\n"); // Do not try to pack to avoid extra instructions here. return ScalarsVectorizationLegality(S, /*IsLegal=*/false, /*TryToFindDuplicates=*/false); } - // Check if this is a duplicate of another entry. - if (S) { - LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getMainOp() << ".\n"); - for (TreeEntry *E : getTreeEntries(S.getMainOp())) { - if (E->isSame(VL)) { - LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp() - << ".\n"); - return ScalarsVectorizationLegality(S, /*IsLegal=*/false); - } - SmallPtrSet Values(llvm::from_range, E->Scalars); - if (all_of(VL, [&](Value *V) { - return isa(V) || Values.contains(V) || - (S.getOpcode() == Instruction::PHI && isa(V) && - LI->getLoopFor(S.getMainOp()->getParent()) && - isVectorized(V)); - })) { - LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n"); - return ScalarsVectorizationLegality(S, /*IsLegal=*/false); - } - } + // Don't handle scalable vectors + if (S.getOpcode() == Instruction::ExtractElement && + isa( + cast(S.getMainOp())->getVectorOperandType())) { + LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n"); + return ScalarsVectorizationLegality(S, /*IsLegal=*/false); } // Gather if we hit the RecursionMaxDepth, unless this is a load (or z/sext of // a load), in which case peek through to include it in the tree, without // ballooning over-budget. if (Depth >= RecursionMaxDepth && - !(S && !S.isAltShuffle() && VL.size() >= 4 && - (match(S.getMainOp(), m_Load(m_Value())) || + (S.isAltShuffle() || VL.size() < 4 || + !(match(S.getMainOp(), m_Load(m_Value())) || all_of(VL, [&S](const Value *I) { return match(I, m_OneUse(m_ZExtOrSExt(m_OneUse(m_Load(m_Value()))))) && @@ -11362,20 +11409,24 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality( return ScalarsVectorizationLegality(S, /*IsLegal=*/false); } - // Don't handle scalable vectors - if (S && S.getOpcode() == Instruction::ExtractElement && - isa( - cast(S.getMainOp())->getVectorOperandType())) { - LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n"); - return ScalarsVectorizationLegality(S, /*IsLegal=*/false); - } - - // Don't handle vectors. - if (!SLPReVec && getValueType(VL.front())->isVectorTy()) { - LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n"); - // Do not try to pack to avoid extra instructions here. - return ScalarsVectorizationLegality(S, /*IsLegal=*/false, - /*TryToFindDuplicates=*/false); + // Check if this is a duplicate of another entry. + LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getMainOp() << ".\n"); + for (TreeEntry *E : getTreeEntries(S.getMainOp())) { + if (E->isSame(VL)) { + LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp() + << ".\n"); + return ScalarsVectorizationLegality(S, /*IsLegal=*/false); + } + SmallPtrSet Values(llvm::from_range, E->Scalars); + if (all_of(VL, [&](Value *V) { + return isa(V) || Values.contains(V) || + (S.getOpcode() == Instruction::PHI && isa(V) && + LI->getLoopFor(S.getMainOp()->getParent()) && + isVectorized(V)); + })) { + LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n"); + return ScalarsVectorizationLegality(S, /*IsLegal=*/false); + } } // If all of the operands are identical or constant we have a simple solution. @@ -11434,44 +11485,13 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality( } return true; }; - SmallVector SortedIndices; - BasicBlock *BB = nullptr; - bool IsScatterVectorizeUserTE = - UserTreeIdx.UserTE && - UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize; - bool AreAllSameBlock = S.valid(); - bool AreScatterAllGEPSameBlock = - (IsScatterVectorizeUserTE && VL.front()->getType()->isPointerTy() && - VL.size() > 2 && - all_of(VL, - [&BB](Value *V) { - auto *I = dyn_cast(V); - if (!I) - return doesNotNeedToBeScheduled(V); - if (!BB) - BB = I->getParent(); - return BB == I->getParent() && I->getNumOperands() == 2; - }) && - BB && - sortPtrAccesses(VL, UserTreeIdx.UserTE->getMainOp()->getType(), *DL, *SE, - SortedIndices)); + bool AreAllSameBlock = !AreScatterAllGEPSameBlock; bool AreAllSameInsts = AreAllSameBlock || AreScatterAllGEPSameBlock; - if (!AreAllSameInsts || (!S && allConstant(VL)) || isSplat(VL) || - (S && - isa( + if (!AreAllSameInsts || isSplat(VL) || + (isa( S.getMainOp()) && !all_of(VL, isVectorLikeInstWithConstOps)) || NotProfitableForVectorization(VL)) { - if (!S) { - LLVM_DEBUG(dbgs() << "SLP: Try split and if failed, gathering due to " - "C,S,B,O, small shuffle. \n"; - dbgs() << "["; - interleaveComma(VL, dbgs(), [&](Value *V) { dbgs() << *V; }); - dbgs() << "]\n"); - return ScalarsVectorizationLegality(S, /*IsLegal=*/false, - /*TryToFindDuplicates=*/true, - /*TrySplitVectorize=*/true); - } LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n"; dbgs() << "["; interleaveComma(VL, dbgs(), [&](Value *V) { dbgs() << *V; }); @@ -11480,7 +11500,7 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality( } // Don't vectorize ephemeral values. - if (S && !EphValues.empty()) { + if (!EphValues.empty()) { for (Value *V : VL) { if (EphValues.count(V)) { LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V @@ -11498,7 +11518,7 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality( // Check that none of the instructions in the bundle are already in the tree // and the node may be not profitable for the vectorization as the small // alternate node. - if (S && S.isAltShuffle()) { + if (S.isAltShuffle()) { auto GetNumVectorizedExtracted = [&]() { APInt Extracted = APInt::getZero(VL.size()); APInt Vectorized = APInt::getAllOnes(VL.size()); @@ -11550,33 +11570,6 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality( } } - // Special processing for sorted pointers for ScatterVectorize node with - // constant indeces only. - if (!AreAllSameBlock && AreScatterAllGEPSameBlock) { - assert(VL.front()->getType()->isPointerTy() && - count_if(VL, IsaPred) >= 2 && - "Expected pointers only."); - // Reset S to make it GetElementPtr kind of node. - const auto *It = find_if(VL, IsaPred); - assert(It != VL.end() && "Expected at least one GEP."); - S = getSameOpcode(*It, *TLI); - } - - // Check that all of the users of the scalars that we want to vectorize are - // schedulable. - Instruction *VL0 = S.getMainOp(); - BB = VL0->getParent(); - - if (S && - (BB->isEHPad() || isa_and_nonnull(BB->getTerminator()) || - !DT->isReachableFromEntry(BB))) { - // Don't go into unreachable blocks. They may contain instructions with - // dependency cycles which confuse the final scheduling. - // Do not vectorize EH and non-returning blocks, not profitable in most - // cases. - LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n"); - return ScalarsVectorizationLegality(S, /*IsLegal=*/false); - } return ScalarsVectorizationLegality(S, /*IsLegal=*/true); }