diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 12cd21617b0d46..a3209af8b23524 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1940,6 +1940,25 @@ class TargetInstrInfo : public MCInstrInfo { virtual bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const; + /// Optional target hook which partitions \p MBB into outlinable ranges for + /// instruction mapping purposes. Each range is defined by two iterators: + /// [start, end). + /// + /// Ranges are expected to be ordered top-down. That is, ranges closer to the + /// top of the block should come before ranges closer to the end of the block. + /// + /// Ranges cannot overlap. + /// + /// If an entire block is mappable, then its range is [MBB.begin(), MBB.end()) + /// + /// All instructions not present in an outlinable range are considered + /// illegal. + virtual SmallVector< + std::pair> + getOutlinableRanges(MachineBasicBlock &MBB, unsigned &Flags) const { + return {std::make_pair(MBB.begin(), MBB.end())}; + } + /// Insert a custom frame for outlined functions. virtual void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const { diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp index 7ce655dce8e34c..e74264276d4c1f 100644 --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -258,6 +258,10 @@ struct InstructionMapper { if (!TII.isMBBSafeToOutlineFrom(MBB, Flags)) return; + auto Ranges = TII.getOutlinableRanges(MBB, Flags); + if (Ranges.empty()) + return; + // Store info for the MBB for later outlining. MBBFlagsMap[&MBB] = Flags; @@ -280,34 +284,47 @@ struct InstructionMapper { std::vector UnsignedVecForMBB; std::vector InstrListForMBB; - for (MachineBasicBlock::iterator Et = MBB.end(); It != Et; ++It) { - // Keep track of where this instruction is in the module. - switch (TII.getOutliningType(It, Flags)) { - case InstrType::Illegal: + for (auto &Range : Ranges) { + auto RangeStart = Range.first; + auto RangeEnd = Range.second; + // Everything outside of an outlinable range is illegal. + for (; It != RangeStart; ++It) mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB, InstrListForMBB); - break; - - case InstrType::Legal: - mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange, - NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB); - break; - - case InstrType::LegalTerminator: - mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange, - NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB); - // The instruction also acts as a terminator, so we have to record that - // in the string. - mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB, + assert(It != MBB.end() && "Should still have instructions?"); + // `It` is now positioned at the beginning of a range of instructions + // which may be outlinable. Check if each instruction is known to be safe. + for (; It != RangeEnd; ++It) { + // Keep track of where this instruction is in the module. + switch (TII.getOutliningType(It, Flags)) { + case InstrType::Illegal: + mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB, + InstrListForMBB); + break; + + case InstrType::Legal: + mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange, + NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB); - break; - - case InstrType::Invisible: - // Normally this is set by mapTo(Blah)Unsigned, but we just want to - // skip this instruction. So, unset the flag here. - ++NumInvisible; - AddedIllegalLastTime = false; - break; + break; + + case InstrType::LegalTerminator: + mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange, + NumLegalInBlock, UnsignedVecForMBB, + InstrListForMBB); + // The instruction also acts as a terminator, so we have to record + // that in the string. + mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB, + InstrListForMBB); + break; + + case InstrType::Invisible: + // Normally this is set by mapTo(Blah)Unsigned, but we just want to + // skip this instruction. So, unset the flag here. + ++NumInvisible; + AddedIllegalLastTime = false; + break; + } } } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 84469dd257cab0..d160a33b529e23 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -6783,48 +6783,11 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( // Properties about candidate MBBs that hold for all of them. unsigned FlagsSetInAll = 0xF; - - // Compute liveness information for each candidate, and set FlagsSetInAll. std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), [&FlagsSetInAll](outliner::Candidate &C) { FlagsSetInAll &= C.Flags; }); - // According to the AArch64 Procedure Call Standard, the following are - // undefined on entry/exit from a function call: - // - // * Registers x16, x17, (and thus w16, w17) - // * Condition codes (and thus the NZCV register) - // - // Because if this, we can't outline any sequence of instructions where - // one - // of these registers is live into/across it. Thus, we need to delete - // those - // candidates. - auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) { - // If the unsafe registers in this block are all dead, then we don't need - // to compute liveness here. - if (C.Flags & UnsafeRegsDead) - return false; - return C.isAnyUnavailableAcrossOrOutOfSeq( - {AArch64::W16, AArch64::W17, AArch64::NZCV}, TRI); - }; - - // Are there any candidates where those registers are live? - if (!(FlagsSetInAll & UnsafeRegsDead)) { - // Erase every candidate that violates the restrictions above. (It could be - // true that we have viable candidates, so it's not worth bailing out in - // the case that, say, 1 out of 20 candidates violate the restructions.) - llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall); - - // If the sequence doesn't have enough candidates left, then we're done. - if (RepeatedSequenceLocs.size() < 2) - return outliner::OutlinedFunction(); - } - - // At this point, we have only "safe" candidates to outline. Figure out - // frame + call instruction information. - unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode(); // Helper lambda which sets call information for every candidate. @@ -6952,6 +6915,10 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( // Check if we have to save LR. for (outliner::Candidate &C : RepeatedSequenceLocs) { + bool LRAvailable = + (C.Flags & MachineOutlinerMBBFlags::LRUnavailableSomewhere) + ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) + : true; // If we have a noreturn caller, then we're going to be conservative and // say that we have to save LR. If we don't have a ret at the end of the // block, then we can't reason about liveness accurately. @@ -6962,7 +6929,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn); // Is LR available? If so, we don't need a save. - if (C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) && !IsNoReturn) { + if (LRAvailable && !IsNoReturn) { NumBytesNoStackCalls += 4; C.setCallInfo(MachineOutlinerNoLRSave, 4); CandidatesWithoutStackFixups.push_back(C); @@ -7134,72 +7101,88 @@ bool AArch64InstrInfo::isFunctionSafeToOutlineFrom( return true; } -bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, - unsigned &Flags) const { - if (!TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags)) - return false; - // Check if LR is available through all of the MBB. If it's not, then set - // a flag. +SmallVector> +AArch64InstrInfo::getOutlinableRanges(MachineBasicBlock &MBB, + unsigned &Flags) const { assert(MBB.getParent()->getRegInfo().tracksLiveness() && - "Suitable Machine Function for outlining must track liveness"); - LiveRegUnits LRU(getRegisterInfo()); + "Must track liveness!"); + SmallVector< + std::pair> + Ranges; - std::for_each(MBB.rbegin(), MBB.rend(), - [&LRU](MachineInstr &MI) { LRU.accumulate(MI); }); + // The range [RangeBegin, RangeEnd). + MachineBasicBlock::instr_iterator RangeEnd = MBB.instr_end(); + MachineBasicBlock::instr_iterator RangeBegin = RangeEnd; + unsigned RangeLen = 0; - // Check if each of the unsafe registers are available... - bool W16AvailableInBlock = LRU.available(AArch64::W16); - bool W17AvailableInBlock = LRU.available(AArch64::W17); - bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV); + // According to the AArch64 Procedure Call Standard, the following are + // undefined on entry/exit from a function call: + // + // * Registers x16, x17, (and thus w16, w17) + // * Condition codes (and thus the NZCV register) + // + // If any of these registers are used inside or live across an outlined + // function, then they may be modified later, either by the compiler or + // some other tool (like the linker). + // + // To avoid outlining in these situations, partition each block into ranges + // where these registers are dead. We will only outline from those ranges. + LiveRegUnits LRU(getRegisterInfo()); + auto AreAllUnsafeRegsDead = [&LRU]() { + return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) && + LRU.available(AArch64::NZCV); + }; - // If all of these are dead (and not live out), we know we don't have to check - // them later. - if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock) - Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead; + // We need to know if LR is live across an outlining boundary later on in + // order to decide how we'll create the outlined call, frame, etc. + // + // It's pretty expensive to check this for *every candidate* within a block. + // That's some potentially n^2 behaviour, since in the worst case, we'd need + // to compute liveness from the end of the block for O(n) candidates within + // the block. + // + // So, to improve the average case, let's keep track of liveness from the end + // of the block to the beginning of *every outlinable range*. If we know that + // LR is available in every range we could outline from, then we know that + // we don't need to check liveness for any candidate within that range. + bool LRAvailableEverywhere = true; - // Now, add the live outs to the set. + // Compute liveness bottom-up. LRU.addLiveOuts(MBB); - - // If any of these registers is available in the MBB, but also a live out of - // the block, then we know outlining is unsafe. - if (W16AvailableInBlock && !LRU.available(AArch64::W16)) - return false; - if (W17AvailableInBlock && !LRU.available(AArch64::W17)) - return false; - if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV)) - return false; - - // Check if there's a call inside this MachineBasicBlock. If there is, then - // set a flag. - if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); })) - Flags |= MachineOutlinerMBBFlags::HasCalls; - - MachineFunction *MF = MBB.getParent(); - - // In the event that we outline, we may have to save LR. If there is an - // available register in the MBB, then we'll always save LR there. Check if - // this is true. - bool CanSaveLR = false; - const AArch64RegisterInfo *ARI = static_cast( - MF->getSubtarget().getRegisterInfo()); - - // Check if there is an available register across the sequence that we can - // use. - for (unsigned Reg : AArch64::GPR64RegClass) { - if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR && - Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) { - CanSaveLR = true; - break; + for (auto &MI : make_range(MBB.instr_rbegin(), MBB.instr_rend())) { + LRU.stepBackward(MI); + // If we are in a range where all of the unsafe registers are dead, then + // update the beginning of the range. Also try to precalculate some stuff + // for getOutliningCandidateInfo. + if (AreAllUnsafeRegsDead()) { + if (MI.isCall()) + Flags |= MachineOutlinerMBBFlags::HasCalls; + LRAvailableEverywhere &= LRU.available(AArch64::LR); + RangeBegin = MI.getIterator(); + ++RangeLen; + continue; } - } - - // Check if we have a register we can save LR to, and if LR was used - // somewhere. If both of those things are true, then we need to evaluate the - // safety of outlining stack instructions later. - if (!CanSaveLR && !LRU.available(AArch64::LR)) + // At least one unsafe register is not dead. We do not want to outline at + // this point. If it is long enough to outline from, save the range + // [RangeBegin, RangeEnd). + if (RangeLen > 1) + Ranges.push_back(std::make_pair(RangeBegin, RangeEnd)); + // Start a new range where RangeEnd is the first known unsafe point. + RangeLen = 0; + RangeBegin = MI.getIterator(); + RangeEnd = MI.getIterator(); + } + // Above loop misses the last (or only) range. + if (AreAllUnsafeRegsDead() && RangeLen > 1) + Ranges.push_back(std::make_pair(RangeBegin, RangeEnd)); + if (Ranges.empty()) + return Ranges; + // We found the ranges bottom-up. Mapping expects the top-down. Reverse + // the order. + std::reverse(Ranges.begin(), Ranges.end()); + if (!LRAvailableEverywhere) Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; - - return true; + return Ranges; } outliner::InstrType diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 55b1813f0b3017..677e1443191cdf 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -280,10 +280,11 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { bool OutlineFromLinkOnceODRs) const override; outliner::OutlinedFunction getOutliningCandidateInfo( std::vector &RepeatedSequenceLocs) const override; - outliner::InstrType - getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const override; - bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, - unsigned &Flags) const override; + outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT, + unsigned Flags) const override; + SmallVector< + std::pair> + getOutlinableRanges(MachineBasicBlock &MBB, unsigned &Flags) const override; void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override; MachineBasicBlock::iterator