diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h index c2bdb40ff96dd..2af3c6d0ea517 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h @@ -150,6 +150,10 @@ class SchedBundle { DGNode *getBot() const; /// Move all bundle instructions to \p Where back-to-back. void cluster(BasicBlock::iterator Where); + /// \Returns true if all nodes in the bundle are ready. + bool ready() const { + return all_of(Nodes, [](const auto *N) { return N->ready(); }); + } #ifndef NDEBUG void dump(raw_ostream &OS) const; LLVM_DUMP_METHOD void dump() const; diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp index ad46683d95063..4893d9177eead 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp @@ -125,36 +125,80 @@ SchedBundle *Scheduler::createBundle(ArrayRef Instrs) { void Scheduler::eraseBundle(SchedBundle *SB) { Bndls.erase(SB); } bool Scheduler::tryScheduleUntil(ArrayRef Instrs) { - // Use a set of instructions, instead of `Instrs` for fast lookups. - DenseSet InstrsToDefer(Instrs.begin(), Instrs.end()); - // This collects the nodes that correspond to instructions found in `Instrs` - // that have just become ready. These nodes won't be scheduled right away. - SmallVector DeferredNodes; - + // Create a bundle for Instrs. If it turns out the schedule is infeasible we + // will dismantle it. + auto *InstrsSB = createBundle(Instrs); // Keep scheduling ready nodes until we either run out of ready nodes (i.e., // ReadyList is empty), or all nodes that correspond to `Instrs` (the nodes of // which are collected in DeferredNodes) are all ready to schedule. - while (!ReadyList.empty()) { - auto *ReadyN = ReadyList.pop(); - if (InstrsToDefer.contains(ReadyN->getInstruction())) { - // If the ready instruction is one of those in `Instrs`, then we don't - // schedule it right away. Instead we defer it until we can schedule it - // along with the rest of the instructions in `Instrs`, at the same - // time in a single scheduling bundle. - DeferredNodes.push_back(ReadyN); - bool ReadyToScheduleDeferred = DeferredNodes.size() == Instrs.size(); - if (ReadyToScheduleDeferred) { - scheduleAndUpdateReadyList(*createBundle(Instrs)); + SmallVector Retry; + bool KeepScheduling = true; + while (KeepScheduling) { + enum class TryScheduleRes { + Success, ///> We successfully scheduled the bundle. + Failure, ///> We failed to schedule the bundle. + Finished, ///> We successfully scheduled the bundle and it is the last + /// bundle to be scheduled. + }; + /// TryScheduleNode() attempts to schedule all DAG nodes in the bundle that + /// ReadyN is in. If it's not in a bundle it will create a singleton bundle + /// and will try to schedule it. + auto TryScheduleBndl = [this, InstrsSB](DGNode *ReadyN) -> TryScheduleRes { + auto *SB = ReadyN->getSchedBundle(); + if (SB == nullptr) { + // If ReadyN does not belong to a bundle, create a singleton bundle + // and schedule it. + auto *SingletonSB = createBundle({ReadyN->getInstruction()}); + scheduleAndUpdateReadyList(*SingletonSB); + return TryScheduleRes::Success; + } + if (SB->ready()) { + // Remove the rest of the bundle from the ready list. + // TODO: Perhaps change the Scheduler + ReadyList to operate on + // SchedBundles instead of DGNodes. + for (auto *N : *SB) { + if (N != ReadyN) + ReadyList.remove(N); + } + // If all nodes in the bundle are ready. + scheduleAndUpdateReadyList(*SB); + if (SB == InstrsSB) + // We just scheduled InstrsSB bundle, so we are done scheduling. + return TryScheduleRes::Finished; + return TryScheduleRes::Success; + } + return TryScheduleRes::Failure; + }; + while (!ReadyList.empty()) { + auto *ReadyN = ReadyList.pop(); + auto Res = TryScheduleBndl(ReadyN); + switch (Res) { + case TryScheduleRes::Success: + // We successfully scheduled ReadyN, keep scheduling. + continue; + case TryScheduleRes::Failure: + // We failed to schedule ReadyN, defer it to later and keep scheduling + // other ready instructions. + Retry.push_back(ReadyN); + continue; + case TryScheduleRes::Finished: + // We successfully scheduled the instruction bundle, so we are done. return true; } - } else { - // If the ready instruction is not found in `Instrs`, then we wrap it in a - // scheduling bundle and schedule it right away. - scheduleAndUpdateReadyList(*createBundle({ReadyN->getInstruction()})); + llvm_unreachable("Unhandled TrySchedule() result"); + } + // Try to schedule nodes from the Retry list. + KeepScheduling = false; + for (auto *N : make_early_inc_range(Retry)) { + auto Res = TryScheduleBndl(N); + if (Res == TryScheduleRes::Success) { + Retry.erase(find(Retry, N)); + KeepScheduling = true; + } } } - assert(DeferredNodes.size() != Instrs.size() && - "We should have succesfully scheduled and early-returned!"); + + eraseBundle(InstrsSB); return false; } @@ -275,6 +319,7 @@ bool Scheduler::trySchedule(ArrayRef Instrs) { // If one or more instrs are already scheduled we need to destroy the // top-most part of the schedule that includes the instrs in the bundle and // re-schedule. + DAG.extend(Instrs); trimSchedule(Instrs); ScheduleTopItOpt = std::next(VecUtils::getLowest(Instrs)->getIterator()); return tryScheduleUntil(Instrs); diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll index 531ed8cb618fc..6baffebd65edc 100644 --- a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll +++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll @@ -360,8 +360,8 @@ define void @vecInstrsPlacement(ptr %ptr0) { ; CHECK-SAME: ptr [[PTR0:%.*]]) { ; CHECK-NEXT: [[VECL2:%.*]] = load <2 x double>, ptr [[PTR0]], align 8 ; CHECK-NEXT: [[VECL:%.*]] = load <2 x double>, ptr [[PTR0]], align 8 -; CHECK-NEXT: [[VEC2:%.*]] = fmul <2 x double> [[VECL]], [[VECL2]] -; CHECK-NEXT: [[VEC:%.*]] = fmul <2 x double> [[VECL]], [[VECL2]] +; CHECK-NEXT: [[VEC2:%.*]] = fmul <2 x double> [[VECL2]], [[VECL]] +; CHECK-NEXT: [[VEC:%.*]] = fmul <2 x double> [[VECL2]], [[VECL]] ; CHECK-NEXT: [[VEC5:%.*]] = fadd <2 x double> [[VEC]], [[VEC2]] ; CHECK-NEXT: store <2 x double> [[VEC5]], ptr [[PTR0]], align 8 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SandboxVectorizer/scheduler.ll b/llvm/test/Transforms/SandboxVectorizer/scheduler.ll index 5b9177ba4b3bf..5306661f13fa6 100644 --- a/llvm/test/Transforms/SandboxVectorizer/scheduler.ll +++ b/llvm/test/Transforms/SandboxVectorizer/scheduler.ll @@ -7,17 +7,17 @@ define void @check_dag_scheduler_update(ptr noalias %p, ptr noalias %p1) { ; CHECK-LABEL: define void @check_dag_scheduler_update( ; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[P1:%.*]]) { ; CHECK-NEXT: [[I:%.*]] = load i32, ptr [[P]], align 4 -; CHECK-NEXT: [[VECL:%.*]] = load <4 x i32>, ptr [[P]], align 4 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr i32, ptr [[P]], i64 34 +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr i32, ptr [[P]], i64 32 ; CHECK-NEXT: [[I2:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 ; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr i32, ptr [[P]], i64 33 ; CHECK-NEXT: [[I4:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4 -; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr i32, ptr [[P]], i64 32 +; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr i32, ptr [[P]], i64 34 ; CHECK-NEXT: [[I6:%.*]] = load i32, ptr [[ARRAYIDX18]], align 4 ; CHECK-NEXT: [[PACK:%.*]] = insertelement <4 x i32> poison, i32 [[I]], i32 0 -; CHECK-NEXT: [[PACK1:%.*]] = insertelement <4 x i32> [[PACK]], i32 [[I6]], i32 1 +; CHECK-NEXT: [[PACK1:%.*]] = insertelement <4 x i32> [[PACK]], i32 [[I2]], i32 1 ; CHECK-NEXT: [[PACK2:%.*]] = insertelement <4 x i32> [[PACK1]], i32 [[I4]], i32 2 -; CHECK-NEXT: [[PACK3:%.*]] = insertelement <4 x i32> [[PACK2]], i32 [[I2]], i32 3 +; CHECK-NEXT: [[PACK3:%.*]] = insertelement <4 x i32> [[PACK2]], i32 [[I6]], i32 3 +; CHECK-NEXT: [[VECL:%.*]] = load <4 x i32>, ptr [[P]], align 4 ; CHECK-NEXT: [[VEC:%.*]] = add nsw <4 x i32> [[PACK3]], [[VECL]] ; CHECK-NEXT: store <4 x i32> [[VEC]], ptr [[P1]], align 4 ; CHECK-NEXT: ret void @@ -56,9 +56,9 @@ define <4 x float> @check_top_of_schedule(ptr %0) { ; CHECK-LABEL: define <4 x float> @check_top_of_schedule( ; CHECK-SAME: ptr [[TMP0:%.*]]) { ; CHECK-NEXT: [[INS_1:%.*]] = insertelement <4 x float> zeroinitializer, float poison, i64 0 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr double, ptr [[TMP0]], i64 1 ; CHECK-NEXT: [[TRUNC_1:%.*]] = fptrunc double 0.000000e+00 to float ; CHECK-NEXT: [[INS_2:%.*]] = insertelement <4 x float> [[INS_1]], float [[TRUNC_1]], i64 0 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr double, ptr [[TMP0]], i64 1 ; CHECK-NEXT: store <2 x double> , ptr [[GEP_1]], align 8 ; CHECK-NEXT: ret <4 x float> [[INS_2]] ; diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp index f827bd7424a06..48703dfc72eb8 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp @@ -324,6 +324,53 @@ define void @foo(ptr noalias %ptr0, ptr noalias %ptr1, i8 %arg) { EXPECT_TRUE(Sched.trySchedule({L0, L1})); } +// Make sure that instructions in SchedBundles are always scheduled +// back-to-back +TEST_F(SchedulerTest, SchedBundleBackToBack) { + parseIR(C, R"IR( +define void @foo(ptr %ptr, i16 %arg) { + %gep0 = getelementptr i32, ptr %ptr, i64 0 + %gep1 = getelementptr i32, ptr %ptr, i64 1 + %zextX = zext i16 0 to i32 + %zext1 = zext i16 0 to i32 + %zext0 = zext i16 %arg to i32 + %shl1 = shl i32 %zextX, 0 + %shl0 = shl i32 %zext1, 0 + %sub1 = sub i32 %zext1, %shl1 + %sub0 = sub i32 %zext0, %shl0 + store i32 %sub1, ptr %gep1 + store i32 %sub0, ptr %gep0 + ret void +})IR"); + llvm::Function *LLVMF = &*M->getFunction("foo"); + sandboxir::Context Ctx(C); + auto *F = Ctx.createFunction(LLVMF); + auto *BB = &*F->begin(); + auto It = BB->begin(); + auto *Gep0 = cast(&*It++); + auto *Gep1 = cast(&*It++); + auto *ZextX = cast(&*It++); + auto *Zext1 = cast(&*It++); + auto *Zext0 = cast(&*It++); + auto *Shl1 = cast(&*It++); + auto *Shl0 = cast(&*It++); + auto *Sub1 = cast(&*It++); + auto *Sub0 = cast(&*It++); + auto *S0 = cast(&*It++); + auto *S1 = cast(&*It++); + + sandboxir::Scheduler Sched(getAA(*LLVMF), Ctx); + EXPECT_TRUE(Sched.trySchedule({S0, S1})); + EXPECT_TRUE(Sched.trySchedule({Zext0, Zext1})); + EXPECT_TRUE(Sched.trySchedule({Shl0, Shl1})); + auto BackToBack = [](sandboxir::Instruction *I1, sandboxir::Instruction *I2) { + return I1->getNextNode() == I2 || I2->getNextNode() == I1; + }; + EXPECT_TRUE(BackToBack(S0, S1)); + EXPECT_TRUE(BackToBack(Zext0, Zext1)); + EXPECT_TRUE(BackToBack(Shl0, Shl1)); +} + // Test that an instruction can't belong in two bundles! TEST_F(SchedulerTest, CheckBundles) { parseIR(C, R"IR(