diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp index 9f03a4e3a5a6f..18a2b3b25a683 100644 --- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp +++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp @@ -47,38 +47,14 @@ static constexpr const char ModuleID[] = "ExegesisInfoTest"; static constexpr const char FunctionID[] = "foo"; static const Align kFunctionAlignment(4096); -// Fills the given basic block with register setup code, and returns true if -// all registers could be setup correctly. -static bool generateSnippetSetupCode(const ExegesisTarget &ET, - const MCSubtargetInfo *const MSI, - BasicBlockFiller &BBF, - const BenchmarkKey &Key, - bool GenerateMemoryInstructions) { +static bool generateRegisterSetupCode( + const ExegesisTarget &ET, const MCSubtargetInfo *const MSI, + BasicBlockFiller &BBF, ArrayRef InitialRegisterValues, + bool GenerateMemoryInstructions, Register StackPointerRegister) { bool IsSnippetSetupComplete = true; - if (GenerateMemoryInstructions) { - BBF.addInstructions(ET.generateMemoryInitialSetup()); - for (const MemoryMapping &MM : Key.MemoryMappings) { -#ifdef __linux__ - // The frontend that generates that parses the memory mapping information - // from the user should validate that the requested address is a multiple - // of the page size. Assert that this is true here. - assert(MM.Address % getpagesize() == 0 && - "Memory mappings need to be aligned to page boundaries."); -#endif - BBF.addInstructions(ET.generateMmap( - MM.Address, Key.MemoryValues.at(MM.MemoryValueName).SizeBytes, - ET.getAuxiliaryMemoryStartAddress() + - sizeof(int) * (Key.MemoryValues.at(MM.MemoryValueName).Index + - SubprocessMemory::AuxiliaryMemoryOffset))); - } - BBF.addInstructions(ET.setStackRegisterToAuxMem()); - } - Register StackPointerRegister = BBF.MF.getSubtarget() - .getTargetLowering() - ->getStackPointerRegisterToSaveRestore(); - for (const RegisterValue &RV : Key.RegisterInitialValues) { + for (const RegisterValue &RV : InitialRegisterValues) { if (GenerateMemoryInstructions) { - // If we're generating memory instructions, don't load in the value for + // If we are generating memory instructions, don't load in the value for // the register with the stack pointer as it will be used later to finish // the setup. if (RV.Register == StackPointerRegister) @@ -90,23 +66,61 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET, IsSnippetSetupComplete = false; BBF.addInstructions(SetRegisterCode); } - if (GenerateMemoryInstructions) { + return IsSnippetSetupComplete; +} + +static void generateMemoryMappings(const ExegesisTarget &ET, + BasicBlockFiller &BBF, + const BenchmarkKey &Key) { + BBF.addInstructions(ET.generateMemoryInitialSetup()); + for (const MemoryMapping &MM : Key.MemoryMappings) { +#ifdef __linux__ + // The frontend that generates that parses the memory mapping information + // from the user should validate that the requested address is a multiple + // of the page size. Assert that this is true here. + assert(MM.Address % getpagesize() == 0 && + "Memory mappings need to be aligned to page boundaries."); +#endif + BBF.addInstructions(ET.generateMmap( + MM.Address, Key.MemoryValues.at(MM.MemoryValueName).SizeBytes, + ET.getAuxiliaryMemoryStartAddress() + + sizeof(int) * (Key.MemoryValues.at(MM.MemoryValueName).Index + + SubprocessMemory::AuxiliaryMemoryOffset))); + } + BBF.addInstructions(ET.setStackRegisterToAuxMem()); +} + +static bool +setStackPointerRegister(const ExegesisTarget &ET, + const MCSubtargetInfo *const MSI, BasicBlockFiller &BBF, + ArrayRef InitialRegisterValues, + Register StackPointerRegister) { + bool IsSnippetSetupComplete = true; + for (const RegisterValue &RV : InitialRegisterValues) { + // Load in the stack register now as we're done using it elsewhere + // and need to set the value in preparation for executing the + // snippet. + if (RV.Register != StackPointerRegister) + continue; + const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value); + if (SetRegisterCode.empty()) + IsSnippetSetupComplete = false; + BBF.addInstructions(SetRegisterCode); + break; + } + return IsSnippetSetupComplete; +} + +static bool generatePerfCounterReset( + const ExegesisTarget &ET, const MCSubtargetInfo *const MSI, + BasicBlockFiller &BBF, ArrayRef InitialRegisterValues, + Register StackPointerRegister) { + bool IsSnippetSetupComplete = true; #ifdef HAVE_LIBPFM - BBF.addInstructions(ET.configurePerfCounter(PERF_EVENT_IOC_RESET, true)); + BBF.addInstructions(ET.configurePerfCounter(PERF_EVENT_IOC_RESET, true)); #endif // HAVE_LIBPFM - for (const RegisterValue &RV : Key.RegisterInitialValues) { - // Load in the stack register now as we're done using it elsewhere - // and need to set the value in preparation for executing the - // snippet. - if (RV.Register != StackPointerRegister) - continue; - const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value); - if (SetRegisterCode.empty()) - IsSnippetSetupComplete = false; - BBF.addInstructions(SetRegisterCode); - break; - } - } + IsSnippetSetupComplete = setStackPointerRegister( + ET, MSI, BBF, InitialRegisterValues, StackPointerRegister); return IsSnippetSetupComplete; } @@ -147,7 +161,7 @@ MachineFunction &createVoidVoidPtrMachineFunction(StringRef FunctionName, return MMI->getOrCreateMachineFunction(*F); } -BasicBlockFiller::BasicBlockFiller(MachineFunction &MF, MachineBasicBlock *MBB, +BasicBlockFiller::BasicBlockFiller(MachineFunction *MF, MachineBasicBlock *MBB, const MCInstrInfo *MCII) : MF(MF), MBB(MBB), MCII(MCII) {} @@ -193,17 +207,17 @@ void BasicBlockFiller::addReturn(const ExegesisTarget &ET, #endif // __linux__ } // Insert the return code. - const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); if (TII->getReturnOpcode() < TII->getNumOpcodes()) { BuildMI(MBB, DL, TII->get(TII->getReturnOpcode())); } else { - MachineIRBuilder MIB(MF); + MachineIRBuilder MIB(*MF); MIB.setMBB(*MBB); FunctionLoweringInfo FuncInfo; FuncInfo.CanLowerReturn = true; - MF.getSubtarget().getCallLowering()->lowerReturn(MIB, nullptr, {}, FuncInfo, - 0); + MF->getSubtarget().getCallLowering()->lowerReturn(MIB, nullptr, {}, + FuncInfo, 0); } } @@ -215,7 +229,7 @@ FunctionFiller::FunctionFiller(MachineFunction &MF, BasicBlockFiller FunctionFiller::addBasicBlock() { MachineBasicBlock *MBB = MF.CreateMachineBasicBlock(); MF.push_back(MBB); - return BasicBlockFiller(MF, MBB, MCII); + return BasicBlockFiller(&MF, MBB, MCII); } ArrayRef FunctionFiller::getRegistersSetUp() const { @@ -241,11 +255,28 @@ BitVector getFunctionReservedRegs(const TargetMachine &TM) { return MF.getSubtarget().getRegisterInfo()->getReservedRegs(MF); } +static void setMBBLiveIns(const ExegesisTarget &ET, MachineBasicBlock *MBB, + bool GenerateMemoryInstructions, + ArrayRef LiveIns) { + for (const unsigned Reg : LiveIns) + MBB->addLiveIn(Reg); + + if (GenerateMemoryInstructions) { + for (const unsigned Reg : ET.getArgumentRegisters()) + MBB->addLiveIn(Reg); + // Add a live in for registers that need saving so that the machine + // verifier doesn't fail if the register is never defined. + for (const unsigned Reg : ET.getRegistersNeedSaving()) + MBB->addLiveIn(Reg); + } +} + Error assembleToStream(const ExegesisTarget &ET, std::unique_ptr TM, ArrayRef LiveIns, const FillFunction &Fill, raw_pwrite_stream &AsmStream, const BenchmarkKey &Key, - bool GenerateMemoryInstructions) { + bool GenerateMemoryInstructions, + std::optional WarmupFill) { auto Context = std::make_unique(); std::unique_ptr Module = createModule(Context, TM->createDataLayout()); @@ -280,20 +311,42 @@ Error assembleToStream(const ExegesisTarget &ET, FunctionFiller Sink(MF, std::move(RegistersSetUp)); auto Entry = Sink.getEntry(); - for (const unsigned Reg : LiveIns) - Entry.MBB->addLiveIn(Reg); + setMBBLiveIns(ET, Entry.MBB, GenerateMemoryInstructions, LiveIns); - if (GenerateMemoryInstructions) { - for (const unsigned Reg : ET.getArgumentRegisters()) - Entry.MBB->addLiveIn(Reg); - // Add a live in for registers that need saving so that the machine verifier - // doesn't fail if the register is never defined. - for (const unsigned Reg : ET.getRegistersNeedSaving()) - Entry.MBB->addLiveIn(Reg); + bool IsSnippetSetupComplete = true; + const MCSubtargetInfo *const MSI = TM->getMCSubtargetInfo(); + + Register StackPointerRegister = MF.getSubtarget() + .getTargetLowering() + ->getStackPointerRegisterToSaveRestore(); + + if (GenerateMemoryInstructions) + generateMemoryMappings(ET, Entry, Key); + + BasicBlockFiller BenchmarkStartBlock = Entry; + + if (WarmupFill) { + IsSnippetSetupComplete &= generateRegisterSetupCode( + ET, MSI, Entry, Key.RegisterInitialValues, GenerateMemoryInstructions, + StackPointerRegister); + + IsSnippetSetupComplete &= setStackPointerRegister( + ET, MSI, Entry, Key.RegisterInitialValues, StackPointerRegister); + + BenchmarkStartBlock = (*WarmupFill)(Sink, false, Entry); + + setMBBLiveIns(ET, BenchmarkStartBlock.MBB, GenerateMemoryInstructions, + LiveIns); } - const bool IsSnippetSetupComplete = generateSnippetSetupCode( - ET, TM->getMCSubtargetInfo(), Entry, Key, GenerateMemoryInstructions); + IsSnippetSetupComplete &= generateRegisterSetupCode( + ET, MSI, BenchmarkStartBlock, Key.RegisterInitialValues, + GenerateMemoryInstructions, StackPointerRegister); + + if (GenerateMemoryInstructions) + IsSnippetSetupComplete &= generatePerfCounterReset( + ET, MSI, BenchmarkStartBlock, Key.RegisterInitialValues, + StackPointerRegister); // If the snippet setup is not complete, we disable liveliness tracking. This // means that we won't know what values are in the registers. @@ -301,7 +354,7 @@ Error assembleToStream(const ExegesisTarget &ET, if (!IsSnippetSetupComplete) Properties.reset(MachineFunctionProperties::Property::TracksLiveness); - Fill(Sink); + Fill(Sink, true, BenchmarkStartBlock); // prologue/epilogue pass needs the reserved registers to be frozen, this // is usually done by the SelectionDAGISel pass. diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.h b/llvm/tools/llvm-exegesis/lib/Assembler.h index d85d7fdcf04f5..8af3399b9f09a 100644 --- a/llvm/tools/llvm-exegesis/lib/Assembler.h +++ b/llvm/tools/llvm-exegesis/lib/Assembler.h @@ -44,7 +44,7 @@ BitVector getFunctionReservedRegs(const TargetMachine &TM); // Helper to fill in a basic block. class BasicBlockFiller { public: - BasicBlockFiller(MachineFunction &MF, MachineBasicBlock *MBB, + BasicBlockFiller(MachineFunction *MF, MachineBasicBlock *MBB, const MCInstrInfo *MCII); void addInstruction(const MCInst &Inst, const DebugLoc &DL = DebugLoc()); @@ -53,9 +53,9 @@ class BasicBlockFiller { void addReturn(const ExegesisTarget &ET, bool SubprocessCleanup, const DebugLoc &DL = DebugLoc()); - MachineFunction &MF; - MachineBasicBlock *const MBB; - const MCInstrInfo *const MCII; + MachineFunction *MF; + MachineBasicBlock *MBB; + const MCInstrInfo *MCII; }; // Helper to fill in a function. @@ -82,7 +82,8 @@ class FunctionFiller { }; // A callback that fills a function. -using FillFunction = std::function; +using FillFunction = + std::function; // Creates a temporary `void foo(char*)` function containing the provided // Instructions. Runs a set of llvm Passes to provide correct prologue and @@ -92,7 +93,8 @@ Error assembleToStream(const ExegesisTarget &ET, std::unique_ptr TM, ArrayRef LiveIns, const FillFunction &Fill, raw_pwrite_stream &AsmStreamm, const BenchmarkKey &Key, - bool GenerateMemoryInstructions); + bool GenerateMemoryInstructions, + std::optional WarmupFill); // Creates an ObjectFile in the format understood by the host. // Note: the resulting object keeps a copy of Buffer so it can be discarded once diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp index 5f08c67bfc89a..67e17b5b6acef 100644 --- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -478,15 +478,22 @@ class SubProcessFunctionExecutorImpl Expected> BenchmarkRunner::assembleSnippet( const BenchmarkCode &BC, const SnippetRepetitor &Repetitor, unsigned MinInstructions, unsigned LoopBodySize, - bool GenerateMemoryInstructions) const { + bool GenerateMemoryInstructions, unsigned MinWarmupInstructions) const { const std::vector &Instructions = BC.Key.Instructions; SmallString<0> Buffer; raw_svector_ostream OS(Buffer); + + std::optional OptionalWarmupFill = {}; + if (MinWarmupInstructions > 0) + OptionalWarmupFill = + Repetitor.Repeat(Instructions, MinWarmupInstructions, LoopBodySize, + GenerateMemoryInstructions); + if (Error E = assembleToStream( State.getExegesisTarget(), State.createTargetMachine(), BC.LiveIns, Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize, GenerateMemoryInstructions), - OS, BC.Key, GenerateMemoryInstructions)) { + OS, BC.Key, GenerateMemoryInstructions, OptionalWarmupFill)) { return std::move(E); } return Buffer; @@ -495,7 +502,7 @@ Expected> BenchmarkRunner::assembleSnippet( Expected BenchmarkRunner::getRunnableConfiguration( const BenchmarkCode &BC, unsigned NumRepetitions, unsigned LoopBodySize, - const SnippetRepetitor &Repetitor) const { + const SnippetRepetitor &Repetitor, unsigned WarmupMinInstructions) const { RunnableConfiguration RC; Benchmark &BenchmarkResult = RC.BenchmarkResult; @@ -519,9 +526,12 @@ BenchmarkRunner::getRunnableConfiguration( if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) { const int MinInstructionsForSnippet = 4 * Instructions.size(); const int LoopBodySizeForSnippet = 2 * Instructions.size(); + // Do not include warmup iterations in the assembled snippet to display + // as reasonable warmup instruction minimums can easily blow up the size + // of the string. auto Snippet = assembleSnippet(BC, Repetitor, MinInstructionsForSnippet, - LoopBodySizeForSnippet, GenerateMemoryInstructions); + LoopBodySizeForSnippet, GenerateMemoryInstructions, 0); if (Error E = Snippet.takeError()) return std::move(E); @@ -534,9 +544,9 @@ BenchmarkRunner::getRunnableConfiguration( // measurements. if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) { - auto Snippet = - assembleSnippet(BC, Repetitor, BenchmarkResult.NumRepetitions, - LoopBodySize, GenerateMemoryInstructions); + auto Snippet = assembleSnippet( + BC, Repetitor, BenchmarkResult.NumRepetitions, LoopBodySize, + GenerateMemoryInstructions, WarmupMinInstructions); if (Error E = Snippet.takeError()) return std::move(E); RC.ObjectFile = getObjectFromBuffer(*Snippet); diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h index d746a0f775646..7a5fac345986c 100644 --- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h @@ -63,7 +63,8 @@ class BenchmarkRunner { Expected getRunnableConfiguration(const BenchmarkCode &Configuration, unsigned NumRepetitions, unsigned LoopUnrollFactor, - const SnippetRepetitor &Repetitor) const; + const SnippetRepetitor &Repetitor, + unsigned WarmupMinInstructions) const; std::pair runConfiguration(RunnableConfiguration &&RC, @@ -116,7 +117,8 @@ class BenchmarkRunner { Expected> assembleSnippet(const BenchmarkCode &BC, const SnippetRepetitor &Repetitor, unsigned MinInstructions, unsigned LoopBodySize, - bool GenerateMemoryInstructions) const; + bool GenerateMemoryInstructions, + unsigned MinWarmupInstructions) const; Expected writeObjectFile(StringRef Buffer, StringRef FileName) const; diff --git a/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp b/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp index cc5a045a8be5d..3100716165f23 100644 --- a/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp +++ b/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp @@ -26,8 +26,8 @@ class DuplicateSnippetRepetitor : public SnippetRepetitor { unsigned LoopBodySize, bool CleanupMemory) const override { return [this, Instructions, MinInstructions, - CleanupMemory](FunctionFiller &Filler) { - auto Entry = Filler.getEntry(); + CleanupMemory](FunctionFiller &Filler, bool AddReturn, + BasicBlockFiller &Entry) -> BasicBlockFiller { if (!Instructions.empty()) { // Add the whole snippet at least once. Entry.addInstructions(Instructions); @@ -35,7 +35,9 @@ class DuplicateSnippetRepetitor : public SnippetRepetitor { Entry.addInstruction(Instructions[I % Instructions.size()]); } } - Entry.addReturn(State.getExegesisTarget(), CleanupMemory); + if (AddReturn) + Entry.addReturn(State.getExegesisTarget(), CleanupMemory); + return std::move(Entry); }; } @@ -57,9 +59,9 @@ class LoopSnippetRepetitor : public SnippetRepetitor { unsigned LoopBodySize, bool CleanupMemory) const override { return [this, Instructions, MinInstructions, LoopBodySize, - CleanupMemory](FunctionFiller &Filler) { + CleanupMemory](FunctionFiller &Filler, bool AddReturn, + BasicBlockFiller &Entry) -> BasicBlockFiller { const auto &ET = State.getExegesisTarget(); - auto Entry = Filler.getEntry(); // We can not use loop snippet repetitor for terminator instructions. for (const MCInst &Inst : Instructions) { @@ -68,7 +70,7 @@ class LoopSnippetRepetitor : public SnippetRepetitor { if (!MCID.isTerminator()) continue; Entry.addReturn(State.getExegesisTarget(), CleanupMemory); - return; + return Entry; } auto Loop = Filler.addBasicBlock(); @@ -93,7 +95,7 @@ class LoopSnippetRepetitor : public SnippetRepetitor { Entry.MBB->addSuccessor(Loop.MBB, BranchProbability::getOne()); Loop.MBB->addSuccessor(Loop.MBB, BranchProbability::getOne()); // If the snippet setup completed, then we can track liveness. - if (Loop.MF.getProperties().hasProperty( + if (Loop.MF->getProperties().hasProperty( MachineFunctionProperties::Property::TracksLiveness)) { // The live ins are: the loop counter, the registers that were setup by // the entry block, and entry block live ins. @@ -112,7 +114,11 @@ class LoopSnippetRepetitor : public SnippetRepetitor { // Set up the exit basic block. Loop.MBB->addSuccessor(Exit.MBB, BranchProbability::getZero()); - Exit.addReturn(State.getExegesisTarget(), CleanupMemory); + + if (AddReturn) + Exit.addReturn(State.getExegesisTarget(), CleanupMemory); + + return Exit; }; } diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp index 1b35fde815f11..d8540a6837416 100644 --- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp @@ -268,6 +268,12 @@ static cl::opt BenchmarkRepeatCount( "before aggregating the results"), cl::cat(BenchmarkOptions), cl::init(30)); +static cl::opt WarmupMinInstructions( + "warmup-min-instructions", + cl::desc("The number of iterations of the snippet to run before starting " + "the performance counters and actually benchmarking the snippet"), + cl::cat(BenchmarkOptions), cl::init(0)); + static ExitOnError ExitOnErr("llvm-exegesis error: "); // Helper function that logs the error(s) and exits. @@ -405,8 +411,9 @@ static void runBenchmarkConfigurations( for (const std::unique_ptr &Repetitor : Repetitors) { - auto RC = ExitOnErr(Runner.getRunnableConfiguration( - Conf, NumRepetitions, LoopBodySize, *Repetitor)); + auto RC = ExitOnErr( + Runner.getRunnableConfiguration(Conf, NumRepetitions, LoopBodySize, + *Repetitor, WarmupMinInstructions)); std::optional DumpFile; if (DumpObjectToDisk.getNumOccurrences()) DumpFile = DumpObjectToDisk; @@ -481,6 +488,12 @@ void benchmarkMain() { #endif } + if (WarmupMinInstructions > 0 && + ExecutionMode != BenchmarkRunner::ExecutionModeE::SubProcess) { + ExitWithError("Warmup iterations are currently only supported in the " + "subprocess execution mode."); + } + InitializeAllAsmPrinters(); InitializeAllAsmParsers(); InitializeAllExegesisTargets(); diff --git a/llvm/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h b/llvm/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h index 9cf63931e6dd5..5c92dbcbb2fcc 100644 --- a/llvm/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h +++ b/llvm/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h @@ -81,7 +81,7 @@ class MachineFunctionGeneratorBaseTest : public ::testing::Test { BenchmarkKey Key; Key.RegisterInitialValues = RegisterInitialValues; EXPECT_FALSE(assembleToStream(*ET, createTargetMachine(), /*LiveIns=*/{}, - Fill, AsmStream, Key, false)); + Fill, AsmStream, Key, false, {})); Expected ExecFunc = ExecutableFunction::create( createTargetMachine(), getObjectFromBuffer(AsmStream.str())); diff --git a/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp index d2382ec0cddc4..53b46fa5cfcdf 100644 --- a/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp +++ b/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp @@ -44,7 +44,8 @@ class X86SnippetRepetitorTest : public X86TestBase { FunctionFiller Sink(*MF, {X86::EAX}); const auto Fill = Repetitor->Repeat(Instructions, kMinInstructions, kLoopBodySize, false); - Fill(Sink); + BasicBlockFiller Entry = Sink.getEntry(); + Fill(Sink, true, Entry); } static constexpr const unsigned kMinInstructions = 3;