Skip to content

Commit

Permalink
Support repeated machine outlining
Browse files Browse the repository at this point in the history
Summary: The following change is to allow the machine outlining can be applied for Nth times, where N is specified by the compiler option. By default the value of N is 1. The motivation is that the repeated machine outlining can further reduce code size.  Please refer to the presentation "Improving Swift Binary Size via Link Time Optimization" in LLVM Developers' Meeting in 2019.

Reviewers: aschwaighofer, tellenbach, paquette

Reviewed By: paquette

Subscribers: tellenbach, hiraditya, llvm-commits, jinlin

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D71027
  • Loading branch information
jinlin-bayarea committed Mar 17, 2020
1 parent 08ab8c9 commit 1f93b16
Show file tree
Hide file tree
Showing 2 changed files with 192 additions and 3 deletions.
47 changes: 44 additions & 3 deletions llvm/lib/CodeGen/MachineOutliner.cpp
Expand Up @@ -97,6 +97,13 @@ static cl::opt<bool> EnableLinkOnceODROutlining(
cl::desc("Enable the machine outliner on linkonceodr functions"),
cl::init(false));

// Set the number of times to repeatedly apply outlining.
// Defaults to 1, but more repetitions can save additional size.
static cl::opt<unsigned>
NumRepeat("machine-outline-runs", cl::Hidden,
cl::desc("The number of times to apply machine outlining"),
cl::init(1));

namespace {

/// Represents an undefined index in the suffix tree.
Expand Down Expand Up @@ -842,6 +849,9 @@ struct MachineOutliner : public ModulePass {
/// linkonceodr linkage.
bool OutlineFromLinkOnceODRs = false;

/// The current repeat number of machine outlining.
unsigned OutlineRepeatedNum = 0;

/// Set to true if the outliner should run on all functions in the module
/// considered safe for outlining.
/// Set to true by default for compatibility with llc's -run-pass option.
Expand Down Expand Up @@ -900,9 +910,12 @@ struct MachineOutliner : public ModulePass {
InstructionMapper &Mapper,
unsigned Name);

/// Calls 'doOutline()'.
/// Calls runOnceOnModule NumRepeat times
bool runOnModule(Module &M) override;

/// Calls 'doOutline()'.
bool runOnceOnModule(Module &M, unsigned Iter);

/// Construct a suffix tree on the instructions in \p M and outline repeated
/// strings from that tree.
bool doOutline(Module &M, unsigned &OutlinedFunctionNum);
Expand Down Expand Up @@ -1099,7 +1112,13 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
// Create the function name. This should be unique.
// FIXME: We should have a better naming scheme. This should be stable,
// regardless of changes to the outliner's cost model/traversal order.
std::string FunctionName = ("OUTLINED_FUNCTION_" + Twine(Name)).str();
std::string FunctionName;
if (OutlineRepeatedNum > 0)
FunctionName = ("OUTLINED_FUNCTION_" + Twine(OutlineRepeatedNum + 1) + "_" +
Twine(Name))
.str();
else
FunctionName = ("OUTLINED_FUNCTION_" + Twine(Name)).str();

// Create the function using an IR-level function.
LLVMContext &C = M.getContext();
Expand Down Expand Up @@ -1438,12 +1457,14 @@ void MachineOutliner::emitInstrCountChangedRemark(
}
}

bool MachineOutliner::runOnModule(Module &M) {
bool MachineOutliner::runOnceOnModule(Module &M, unsigned Iter) {
// Check if there's anything in the module. If it's empty, then there's
// nothing to outline.
if (M.empty())
return false;

OutlineRepeatedNum = Iter;

// Number to append to the current outlined function.
unsigned OutlinedFunctionNum = 0;

Expand Down Expand Up @@ -1507,3 +1528,23 @@ bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) {

return OutlinedSomething;
}

// Apply machine outlining for NumRepeat times.
bool MachineOutliner::runOnModule(Module &M) {
if (NumRepeat < 1)
report_fatal_error("Expect NumRepeat for machine outlining "
"to be greater than or equal to 1!\n");

bool Changed = false;
for (unsigned I = 0; I < NumRepeat; I++) {
if (!runOnceOnModule(M, I)) {
LLVM_DEBUG(dbgs() << "Stopped outlining at iteration " << I
<< " because no changes were found.\n";);
return Changed;
}
Changed = true;
}
LLVM_DEBUG(dbgs() << "Stopped outlining because iteration is "
"equal to " << NumRepeat << "\n";);
return Changed;
}
148 changes: 148 additions & 0 deletions llvm/test/CodeGen/AArch64/machine-outliner-iterative.mir
@@ -0,0 +1,148 @@
# RUN: llc -mtriple=aarch64--- -run-pass=machine-outliner -machine-outline-runs=2 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix TWO-RUNS
# RUN: llc -mtriple=aarch64--- -run-pass=machine-outliner -machine-outline-runs=1 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix ONE-RUN
# RUN: llc -mtriple=aarch64--- -run-pass=machine-outliner -machine-outline-runs=4 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix FOUR-RUNS

# Example of Repeated Instruction Sequence - Iterative Machine Outlining
#
#; define void @"$s12"(...) { define i64 @"$s5” (...) { define void @"$s13"(...) {
# ... ... ...
# %8 = load i1, i1* %7 %8 = load i1, i1* %7
# %9 = load i4, i4*, %6 %9 = load i4, i4*, %6 %9 = load i4, i4*, %6
# store i4 %9, i4* %5 store i4 %9, i4* %5 store i4 %9, i4* %5
# ... ... ...
# } } }
#
# After machine outliner (1st time)
#
# define void @"$s12"(...) { define i64 @"$s5” (...) { define void @"$s13"(...) {
# ... ... ...
# %8 = load i1, i1* %7 %8 = load i1, i1* %7
# call void @outlined_function_1_1 call void @outlined_function_1_1 call void @outlined_function_1_1
# ... ... ...
# } } }
#
# After machine outliner (2nd time)
#
# define void @"$s12"(...) { define i64 @"$s5” (...) { define void @"$s13"(...) {
# ... ... ...
# call void @outlined_function_2_1 call void @outlined_function_1_1 call void @outlined_function_2_1
# ... ... ...
# } } }
#
# Check whether machine outliner can further find the outlining opportunity after machine
# outlining has performed.
#
--- |
target triple = "aarch64-apple-darwin"

declare void @foo() local_unnamed_addr

declare void @widget() local_unnamed_addr

; Function Attrs: minsize noredzone optsize
define void @baz.14() #0 {
ret void
}

; Function Attrs: minsize noredzone optsize
define void @baz.15() #0 {
ret void
}

; Function Attrs: minsize noredzone optsize
define void @baz.16() #0 {
ret void
}

attributes #0 = { minsize noredzone optsize }
...
---
name: baz.14
tracksRegLiveness: true
stack:
- { id: 0, offset: -8, size: 8 }
- { id: 1, offset: -16, size: 8 }
body: |
bb.0:
liveins: $x0, $x19, $lr
early-clobber $sp = frame-setup STPXpre killed $lr, killed $x19, $sp, -2 :: (store 8 into %stack.1), (store 8 into %stack.0)
frame-setup CFI_INSTRUCTION def_cfa_offset 16
frame-setup CFI_INSTRUCTION offset $w19, -8
frame-setup CFI_INSTRUCTION offset $w30, -16
renamable $x19 = COPY $x0
renamable $x0 = nuw ADDXri $x0, 48, 0
$x1 = ADDXri $sp, 0, 0
dead $w2 = MOVi32imm 33, implicit-def $x2
$x3 = COPY $xzr
BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $x1, implicit killed $x2, implicit killed $x3, implicit-def $sp
$x0 = COPY killed renamable $x19
BL @widget, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
early-clobber $sp, $lr, $x19 = frame-destroy LDPXpost $sp, 2 :: (load 8 from %stack.1), (load 8 from %stack.0)
RET_ReallyLR
...
---
name: baz.15
stack:
- { id: 0, offset: -8, size: 8 }
- { id: 1, offset: -16, size: 8 }
body: |
bb.0:
early-clobber $sp = frame-setup STPXpre killed $lr, killed $x19, $sp, -2 :: (store 8 into %stack.1), (store 8 into %stack.0)
frame-setup CFI_INSTRUCTION def_cfa_offset 16
frame-setup CFI_INSTRUCTION offset $w19, -8
frame-setup CFI_INSTRUCTION offset $w30, -16
renamable $x19 = COPY $x0
renamable $x0 = nuw ADDXri killed renamable $x1, 16, 0
$x1 = ADDXri $sp, 0, 0
dead $w2 = MOVi32imm 33, implicit-def $x2
$x3 = COPY $xzr
BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit $x1, implicit killed $x2, implicit killed $x3, implicit-def $sp
$x0 = COPY killed renamable $x19
BL @widget, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
early-clobber $sp, $lr, $x19 = frame-destroy LDPXpost $sp, 2 :: (load 8 from %stack.1), (load 8 from %stack.0)
RET_ReallyLR
...
---
name: baz.16
tracksRegLiveness: true
stack:
- { id: 0, offset: -8, size: 8 }
- { id: 1, offset: -16, size: 8 }
body: |
bb.0:
liveins: $x0, $x19, $lr
early-clobber $sp = frame-setup STPXpre killed $lr, killed $x19, $sp, -2 :: (store 8 into %stack.1), (store 8 into %stack.0)
frame-setup CFI_INSTRUCTION def_cfa_offset 16
frame-setup CFI_INSTRUCTION offset $w19, -8
frame-setup CFI_INSTRUCTION offset $w30, -16
renamable $x19 = COPY $x0
renamable $x0 = nuw ADDXri $x0, 48, 0
$x1 = ADDXri $sp, 0, 0
dead $w2 = MOVi32imm 33, implicit-def $x2
$x3 = COPY $xzr
BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $x1, implicit killed $x2, implicit killed $x3, implicit-def $sp
$x0 = COPY killed renamable $x19
BL @widget, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
early-clobber $sp, $lr, $x19 = frame-destroy LDPXpost $sp, 2 :: (load 8 from %stack.1), (load 8 from %stack.0)
RET_ReallyLR
...

# TWO-RUNS: name: OUTLINED_FUNCTION_2_0
# TWO-RUNS-DAG: bb.0:
# TWO-RUNS-DAG: renamable $x19 = COPY $x0
# TWO-RUNS-NEXT: renamable $x0 = nuw ADDXri $x0, 48, 0
# TWO-RUNS-NEXT: TCRETURNdi @OUTLINED_FUNCTION_0, 0, implicit $sp
#
# The machine outliner is expected to stop at the 1st iteration for case ONE-RUN
# since machine-outline-runs is specified as 1.
# ONE-RUN-NOT: [[OUTLINED:OUTLINED_FUNCTION_2_[0-9]+]]
#
# The machine outliner is expected to stop at the 3rd iteration for case FOUR-RUNS
# since the MIR has no change at the 3rd iteration.
# FOUR-RUNS-NOT: [[OUTLINED:OUTLINED_FUNCTION_3_[0-9]+]]
# FOUR-RUNS-NOT: [[OUTLINED:OUTLINED_FUNCTION_4_[0-9]+]]

0 comments on commit 1f93b16

Please sign in to comment.