From 3a503ce66318ed65d071f6401af5750640d33444 Mon Sep 17 00:00:00 2001 From: Shengchen Kan Date: Tue, 3 Mar 2020 16:54:23 +0800 Subject: [PATCH] [X86] Reduce the number of emitted fragments due to branch align Summary: Currently, a BoundaryAlign fragment may be inserted after the branch that needs to be aligned to truncate the current fragment, this fragment is unused at most of time. To avoid that, we can insert a new empty Data fragment instead. Non-relaxable instruction is usually emitted into Data fragment, so the inserted empty Data fragment will be reused at a high possibility. Reviewers: annita.zhang, reames, MaskRay, craig.topper, LuoYuanke, jyknight Reviewed By: reames, LuoYuanke Subscribers: llvm-commits, dexonsmith, hiraditya Tags: #llvm Differential Revision: https://reviews.llvm.org/D75438 --- llvm/include/llvm/MC/MCFragment.h | 24 ++--- llvm/lib/MC/MCAssembler.cpp | 21 ++--- llvm/lib/MC/MCFragment.cpp | 7 +- llvm/lib/MC/MCObjectStreamer.cpp | 9 -- .../Target/X86/MCTargetDesc/X86AsmBackend.cpp | 94 ++++++++++--------- llvm/test/MC/X86/align-branch-64-negative.s | 26 +---- 6 files changed, 71 insertions(+), 110 deletions(-) diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h index e052098611a99..bde0835b6a553 100644 --- a/llvm/include/llvm/MC/MCFragment.h +++ b/llvm/include/llvm/MC/MCFragment.h @@ -523,20 +523,16 @@ class MCCVDefRangeFragment : public MCEncodedFragmentWithFixups<32, 4> { class MCBoundaryAlignFragment : public MCFragment { /// The alignment requirement of the branch to be aligned. Align AlignBoundary; - /// Flag to indicate whether the branch is fused. Use in determining the - /// region of fragments being aligned. - bool Fused : 1; - /// Flag to indicate whether NOPs should be emitted. - bool EmitNops : 1; + /// The last fragment in the set of fragments to be aligned. + const MCFragment *LastFragment = nullptr; /// The size of the fragment. The size is lazily set during relaxation, and /// is not meaningful before that. uint64_t Size = 0; public: - MCBoundaryAlignFragment(Align AlignBoundary = Align(1), bool Fused = false, - bool EmitNops = false, MCSection *Sec = nullptr) - : MCFragment(FT_BoundaryAlign, false, Sec), AlignBoundary(AlignBoundary), - Fused(Fused), EmitNops(EmitNops) {} + MCBoundaryAlignFragment(Align AlignBoundary, MCSection *Sec = nullptr) + : MCFragment(FT_BoundaryAlign, false, Sec), AlignBoundary(AlignBoundary) { + } uint64_t getSize() const { return Size; } void setSize(uint64_t Value) { Size = Value; } @@ -544,11 +540,11 @@ class MCBoundaryAlignFragment : public MCFragment { Align getAlignment() const { return AlignBoundary; } void setAlignment(Align Value) { AlignBoundary = Value; } - bool isFused() const { return Fused; } - void setFused(bool Value) { Fused = Value; } - - bool canEmitNops() const { return EmitNops; } - void setEmitNops(bool Value) { EmitNops = Value; } + const MCFragment *getLastFragment() const { return LastFragment; } + void setLastFragment(const MCFragment *F) { + assert(!F || getParent() == F->getParent()); + LastFragment = F; + } static bool classof(const MCFragment *F) { return F->getKind() == MCFragment::FT_BoundaryAlign; diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp index b32c9b5fdfacb..83f0d62bac2ad 100644 --- a/llvm/lib/MC/MCAssembler.cpp +++ b/llvm/lib/MC/MCAssembler.cpp @@ -996,27 +996,22 @@ static bool needPadding(uint64_t StartAddr, uint64_t Size, bool MCAssembler::relaxBoundaryAlign(MCAsmLayout &Layout, MCBoundaryAlignFragment &BF) { - // The MCBoundaryAlignFragment that doesn't emit NOP should not be relaxed. - if (!BF.canEmitNops()) + // BoundaryAlignFragment that doesn't need to align any fragment should not be + // relaxed. + if (!BF.getLastFragment()) return false; - uint64_t AlignedOffset = Layout.getFragmentOffset(BF.getNextNode()); + uint64_t AlignedOffset = Layout.getFragmentOffset(&BF); uint64_t AlignedSize = 0; - const MCFragment *F = BF.getNextNode(); - // If the branch is unfused, it is emitted into one fragment, otherwise it is - // emitted into two fragments at most, the next MCBoundaryAlignFragment(if - // exists) also marks the end of the branch. - for (auto i = 0, N = BF.isFused() ? 2 : 1; - i != N && !isa(F); ++i, F = F->getNextNode()) { + for (const MCFragment *F = BF.getLastFragment(); F != &BF; + F = F->getPrevNode()) AlignedSize += computeFragmentSize(Layout, *F); - } - uint64_t OldSize = BF.getSize(); - AlignedOffset -= OldSize; + Align BoundaryAlignment = BF.getAlignment(); uint64_t NewSize = needPadding(AlignedOffset, AlignedSize, BoundaryAlignment) ? offsetToAlignment(AlignedOffset, BoundaryAlignment) : 0U; - if (NewSize == OldSize) + if (NewSize == BF.getSize()) return false; BF.setSize(NewSize); Layout.invalidateFragmentsFrom(&BF); diff --git a/llvm/lib/MC/MCFragment.cpp b/llvm/lib/MC/MCFragment.cpp index a96b8e86aed3c..79e675c59c6b7 100644 --- a/llvm/lib/MC/MCFragment.cpp +++ b/llvm/lib/MC/MCFragment.cpp @@ -424,14 +424,9 @@ LLVM_DUMP_METHOD void MCFragment::dump() const { } case MCFragment::FT_BoundaryAlign: { const auto *BF = cast(this); - if (BF->canEmitNops()) - OS << " (can emit nops to align"; - if (BF->isFused()) - OS << " fused branch)"; - else - OS << " unfused branch)"; OS << "\n "; OS << " BoundarySize:" << BF->getAlignment().value() + << " LastFragment:" << BF->getLastFragment() << " Size:" << BF->getSize(); break; } diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index 33fcc20cb7b75..a36cdc4c1abba 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -215,15 +215,6 @@ MCObjectStreamer::getOrCreateDataFragment(const MCSubtargetInfo *STI) { return F; } -MCBoundaryAlignFragment *MCObjectStreamer::getOrCreateBoundaryAlignFragment() { - auto *F = dyn_cast_or_null(getCurrentFragment()); - if (!F || F->canEmitNops()) { - F = new MCBoundaryAlignFragment(); - insert(F); - } - return F; -} - void MCObjectStreamer::visitUsedSymbol(const MCSymbol &Sym) { Assembler->registerSymbol(Sym); } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 426ddb7a29b54..2f84d23d6db5c 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -133,6 +133,7 @@ class X86AsmBackend : public MCAsmBackend { bool needAlign(MCObjectStreamer &OS) const; bool needAlignInst(const MCInst &Inst) const; MCInst PrevInst; + MCBoundaryAlignFragment *PendingBoundaryAlign = nullptr; public: X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) @@ -493,27 +494,30 @@ bool X86AsmBackend::needAlignInst(const MCInst &Inst) const { (AlignBranchType & X86::AlignBranchIndirect)); } -/// Insert MCBoundaryAlignFragment before instructions to align branches. +/// Insert BoundaryAlignFragment before instructions to align branches. void X86AsmBackend::alignBranchesBegin(MCObjectStreamer &OS, const MCInst &Inst) { if (!needAlign(OS)) return; - MCFragment *CF = OS.getCurrentFragment(); - bool NeedAlignFused = AlignBranchType & X86::AlignBranchFused; - if (hasInterruptDelaySlot(PrevInst)) { + if (hasInterruptDelaySlot(PrevInst)) // If this instruction follows an interrupt enabling instruction with a one // instruction delay, inserting a nop would change behavior. - } else if (NeedAlignFused && isMacroFused(PrevInst, Inst) && CF) { + return; + + if (!isMacroFused(PrevInst, Inst)) + // Macro fusion doesn't happen indeed, clear the pending. + PendingBoundaryAlign = nullptr; + + if (PendingBoundaryAlign && + OS.getCurrentFragment()->getPrevNode() == PendingBoundaryAlign) { // Macro fusion actually happens and there is no other fragment inserted - // after the previous instruction. NOP can be emitted in PF to align fused - // jcc. - if (auto *PF = - dyn_cast_or_null(CF->getPrevNode())) { - const_cast(PF)->setEmitNops(true); - const_cast(PF)->setFused(true); - } - } else if (needAlignInst(Inst)) { + // after the previous instruction. + // + // Do nothing here since we already inserted a BoudaryAlign fragment when + // we met the first instruction in the fused pair and we'll tie them + // together in alignBranchesEnd. + // // Note: When there is at least one fragment, such as MCAlignFragment, // inserted after the previous instruction, e.g. // @@ -525,36 +529,38 @@ void X86AsmBackend::alignBranchesBegin(MCObjectStreamer &OS, // // We will treat the JCC as a unfused branch although it may be fused // with the CMP. - auto *F = OS.getOrCreateBoundaryAlignFragment(); - F->setAlignment(AlignBoundary); - F->setEmitNops(true); - F->setFused(false); - } else if (NeedAlignFused && isFirstMacroFusibleInst(Inst, *MCII)) { - // We don't know if macro fusion happens until the reaching the next - // instruction, so a place holder is put here if necessary. - auto *F = OS.getOrCreateBoundaryAlignFragment(); - F->setAlignment(AlignBoundary); + return; } - PrevInst = Inst; + if (needAlignInst(Inst) || ((AlignBranchType & X86::AlignBranchFused) && + isFirstMacroFusibleInst(Inst, *MCII))) { + // If we meet a unfused branch or the first instuction in a fusiable pair, + // insert a BoundaryAlign fragment. + OS.insert(PendingBoundaryAlign = + new MCBoundaryAlignFragment(AlignBoundary)); + } } -/// Insert a MCBoundaryAlignFragment to mark the end of the branch to be aligned -/// if necessary. +/// Set the last fragment to be aligned for the BoundaryAlignFragment. void X86AsmBackend::alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) { if (!needAlign(OS)) return; - // If the branch is emitted into a MCRelaxableFragment, we can determine the - // size of the branch easily in MCAssembler::relaxBoundaryAlign. When the - // branch is fused, the fused branch(macro fusion pair) must be emitted into - // two fragments. Or when the branch is unfused, the branch must be emitted - // into one fragment. The MCRelaxableFragment naturally marks the end of the - // fused or unfused branch. - // Otherwise, we need to insert a MCBoundaryAlignFragment to mark the end of - // the branch. This MCBoundaryAlignFragment may be reused to emit NOP to align - // other branch. - if (needAlignInst(Inst) && !isa(OS.getCurrentFragment())) - OS.insert(new MCBoundaryAlignFragment(AlignBoundary)); + + PrevInst = Inst; + + if (!needAlignInst(Inst) || !PendingBoundaryAlign) + return; + + // Tie the aligned instructions into a a pending BoundaryAlign. + PendingBoundaryAlign->setLastFragment(OS.getCurrentFragment()); + PendingBoundaryAlign = nullptr; + + // We need to ensure that further data isn't added to the current + // DataFragment, so that we can get the size of instructions later in + // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty + // DataFragment. + if (isa_and_nonnull(OS.getCurrentFragment())) + OS.insert(new MCDataFragment()); // Update the maximum alignment on the current section if necessary. MCSection *Sec = OS.getCurrentSectionOnly(); @@ -862,16 +868,12 @@ void X86AsmBackend::finishLayout(MCAssembler const &Asm, // If we're looking at a boundary align, make sure we don't try to pad // its target instructions for some following directive. Doing so would // break the alignment of the current boundary align. - if (F.getKind() == MCFragment::FT_BoundaryAlign) { - auto &BF = cast(F); - const MCFragment *F = BF.getNextNode(); - // If the branch is unfused, it is emitted into one fragment, otherwise - // it is emitted into two fragments at most, the next - // MCBoundaryAlignFragment(if exists) also marks the end of the branch. - for (int i = 0, N = BF.isFused() ? 2 : 1; - i != N && !isa(F); - ++i, F = F->getNextNode(), I++) { - } + if (auto *BF = dyn_cast(&F)) { + const MCFragment *LastFragment = BF->getLastFragment(); + if (!LastFragment) + continue; + while (&*I != LastFragment) + ++I; } } } diff --git a/llvm/test/MC/X86/align-branch-64-negative.s b/llvm/test/MC/X86/align-branch-64-negative.s index 54e9e70561e5f..87c465ca7e5aa 100644 --- a/llvm/test/MC/X86/align-branch-64-negative.s +++ b/llvm/test/MC/X86/align-branch-64-negative.s @@ -24,31 +24,13 @@ labeled_call_test1: label_before: callq bar - # In the second test, we have a label which is expected to be bound to the - # end of the call. For instance, we want the to associate some data with - # the return address of the call. - # CHECK-LABEL: : - # CHECK: 5a: callq - # CHECK: 5f: nop - # CHECK: 60 : - # CHECK: 60: jmp - .globl labeled_call_test2 - .p2align 5 -labeled_call_test2: - .rept 26 - int3 - .endr - callq bar -label_after: - jmp bar - - # Our third test is like the first w/a labeled fault, but specifically to + # Our second test is like the first w/a labeled fault, but specifically to # a fused memory comparison. This is the form produced by implicit null # checks for instance. # CHECK-LABEL: : - # CHECK: 9f : - # CHECK: 9f: nop - # CHECK: a0: cmpq + # CHECK: 5f : + # CHECK: 5f: nop + # CHECK: 60: cmpq .globl implicit_null_check .p2align 5 implicit_null_check: