diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 3f18bd70539a0..106cde352e0b5 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5578,7 +5578,7 @@ class BoUpSLP { // Decrement the unscheduled counter and insert to ready list if // ready. auto DecrUnschedForInst = [&](Instruction *I, TreeEntry *UserTE, - unsigned OpIdx) { + unsigned OpIdx, bool FirstRun = false) { if (!ScheduleCopyableDataMap.empty()) { const EdgeInfo EI = {UserTE, OpIdx}; if (ScheduleCopyableData *CD = getScheduleCopyableData(EI, I)) { @@ -5586,6 +5586,8 @@ class BoUpSLP { return; } } + if (!FirstRun) + return; auto It = OperandsUses.find(I); assert(It != OperandsUses.end() && "Operand not found"); if (It->second > 0) { @@ -5602,37 +5604,48 @@ class BoUpSLP { break; // Need to search for the lane since the tree entry can be // reordered. - int Lane = std::distance(Bundle->getTreeEntry()->Scalars.begin(), - find(Bundle->getTreeEntry()->Scalars, In)); - assert(Lane >= 0 && "Lane not set"); - if (isa(In) && - !Bundle->getTreeEntry()->ReorderIndices.empty()) - Lane = Bundle->getTreeEntry()->ReorderIndices[Lane]; - assert(Lane < static_cast( - Bundle->getTreeEntry()->Scalars.size()) && - "Couldn't find extract lane"); - - // Since vectorization tree is being built recursively this - // assertion ensures that the tree entry has all operands set before - // reaching this code. Couple of exceptions known at the moment are - // extracts where their second (immediate) operand is not added. - // Since immediates do not affect scheduler behavior this is - // considered okay. - assert(In && - (isa(In) || - In->getNumOperands() == - Bundle->getTreeEntry()->getNumOperands() || - Bundle->getTreeEntry()->isCopyableElement(In)) && - "Missed TreeEntry operands?"); - - for (unsigned OpIdx : - seq(Bundle->getTreeEntry()->getNumOperands())) - if (auto *I = dyn_cast( - Bundle->getTreeEntry()->getOperand(OpIdx)[Lane])) { - LLVM_DEBUG(dbgs() << "SLP: check for readiness (def): " << *I - << "\n"); - DecrUnschedForInst(I, Bundle->getTreeEntry(), OpIdx); - } + auto *It = find(Bundle->getTreeEntry()->Scalars, In); + bool FirstRun = true; + do { + int Lane = + std::distance(Bundle->getTreeEntry()->Scalars.begin(), It); + assert(Lane >= 0 && "Lane not set"); + if (isa(In) && + !Bundle->getTreeEntry()->ReorderIndices.empty()) + Lane = Bundle->getTreeEntry()->ReorderIndices[Lane]; + assert(Lane < static_cast( + Bundle->getTreeEntry()->Scalars.size()) && + "Couldn't find extract lane"); + + // Since vectorization tree is being built recursively this + // assertion ensures that the tree entry has all operands set + // before reaching this code. Couple of exceptions known at the + // moment are extracts where their second (immediate) operand is + // not added. Since immediates do not affect scheduler behavior + // this is considered okay. + assert(In && + (isa(In) || + In->getNumOperands() == + Bundle->getTreeEntry()->getNumOperands() || + Bundle->getTreeEntry()->isCopyableElement(In)) && + "Missed TreeEntry operands?"); + + for (unsigned OpIdx : + seq(Bundle->getTreeEntry()->getNumOperands())) + if (auto *I = dyn_cast( + Bundle->getTreeEntry()->getOperand(OpIdx)[Lane])) { + LLVM_DEBUG(dbgs() << "SLP: check for readiness (def): " + << *I << "\n"); + DecrUnschedForInst(I, Bundle->getTreeEntry(), OpIdx, + FirstRun); + } + // If parent node is schedulable, it will be handle correctly. + if (!Bundle->getTreeEntry()->doesNotNeedToSchedule()) + break; + It = std::find(std::next(It), + Bundle->getTreeEntry()->Scalars.end(), In); + FirstRun = false; + } while (It != Bundle->getTreeEntry()->Scalars.end()); } } else { // If BundleMember is a stand-alone instruction, no operand reordering diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-parent-multi-copyables.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-parent-multi-copyables.ll new file mode 100644 index 0000000000000..7accca311af3c --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-parent-multi-copyables.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s -slp-threshold=-99999 | FileCheck %s + +define void @test() { +; CHECK-LABEL: define void @test() { +; CHECK-NEXT: [[BB:.*]]: +; CHECK-NEXT: br i1 false, label %[[BB1:.*]], label %[[BB6:.*]] +; CHECK: [[BB1]]: +; CHECK-NEXT: br label %[[BB6]] +; CHECK: [[BB6]]: +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ , %[[BB]] ], [ , %[[BB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: ret void +; +bb: + br i1 false, label %bb1, label %bb6 + +bb1: + %add = add i32 0, 0 + %shl = shl i32 %add, 0 + %sub = sub i32 0, 1 + %add2 = add i32 %sub, %shl + %add3 = add i32 0, 0 + %shl4 = shl i32 %add3, 0 + %ashr = ashr i32 %shl4, 1 + %add5 = add i32 0, 0 + br label %bb6 + +bb6: + %phi = phi i32 [ poison, %bb ], [ %add2, %bb1 ] + %phi7 = phi i32 [ 0, %bb ], [ %ashr, %bb1 ] + %phi8 = phi i32 [ 0, %bb ], [ %add2, %bb1 ] + %phi9 = phi i32 [ 0, %bb ], [ %add5, %bb1 ] + %or = or i32 %phi8, 0 + ret void +}