Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Summary: This patch introduces LoopInversionPass. Its main purpose is to ensure that the loop layout is optimal depending on the profile information. So if profile information shows that the loop is used, the unconditional jump instruction must be executed only once and vice-versa. Please take a look to the pass header file and test for more details. Also change link_fdata script a bit, to be able to change FDATA prefix, like FileCheck does. Vladislav Khmelevsky, Advanced Software Technology Lab, Huawei PR facebookarchive/BOLT#153 (cherry picked from FBD28391811)
- Loading branch information
Showing
7 changed files
with
234 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
//===--------- Passes/LoopInversionPass.cpp -------------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "LoopInversionPass.h" | ||
#include "ParallelUtilities.h" | ||
|
||
using namespace llvm; | ||
|
||
namespace opts { | ||
extern cl::OptionCategory BoltCategory; | ||
|
||
extern cl::opt<bolt::ReorderBasicBlocks::LayoutType> ReorderBlocks; | ||
|
||
static cl::opt<bool> LoopReorder( | ||
"loop-inversion-opt", | ||
cl::desc("reorder unconditional jump instructions in loops optimization"), | ||
cl::init(true), cl::cat(BoltCategory), cl::ReallyHidden); | ||
} // namespace opts | ||
|
||
namespace llvm { | ||
namespace bolt { | ||
|
||
bool LoopInversionPass::runOnFunction(BinaryFunction &BF) { | ||
bool IsChanged = false; | ||
if (BF.layout_size() < 3 || !BF.hasValidProfile()) | ||
return false; | ||
|
||
BF.updateLayoutIndices(); | ||
for (BinaryBasicBlock *BB : BF.layout()) { | ||
if (BB->succ_size() != 1 || BB->pred_size() != 1) | ||
continue; | ||
|
||
BinaryBasicBlock *SuccBB = *BB->succ_begin(); | ||
BinaryBasicBlock *PredBB = *BB->pred_begin(); | ||
const unsigned BBIndex = BB->getLayoutIndex(); | ||
const unsigned SuccBBIndex = SuccBB->getLayoutIndex(); | ||
if (SuccBB == PredBB && BB != SuccBB && BBIndex != 0 && SuccBBIndex != 0 && | ||
SuccBB->succ_size() == 2 && BB->isCold() == SuccBB->isCold()) { | ||
// Get the second successor (after loop BB) | ||
BinaryBasicBlock *SecondSucc = nullptr; | ||
for (BinaryBasicBlock *Succ : SuccBB->successors()) { | ||
if (Succ != &*BB) { | ||
SecondSucc = Succ; | ||
break; | ||
} | ||
} | ||
|
||
assert(SecondSucc != nullptr && "Unable to find second BB successor"); | ||
const uint64_t BBCount = SuccBB->getBranchInfo(*BB).Count; | ||
const uint64_t OtherCount = SuccBB->getBranchInfo(*SecondSucc).Count; | ||
if ((BBCount < OtherCount) && (BBIndex > SuccBBIndex)) | ||
continue; | ||
|
||
IsChanged = true; | ||
BB->setLayoutIndex(SuccBBIndex); | ||
SuccBB->setLayoutIndex(BBIndex); | ||
} | ||
} | ||
|
||
if (IsChanged) { | ||
BinaryFunction::BasicBlockOrderType NewOrder = BF.getLayout(); | ||
std::sort(NewOrder.begin(), NewOrder.end(), | ||
[&](BinaryBasicBlock *BB1, BinaryBasicBlock *BB2) { | ||
return BB1->getLayoutIndex() < BB2->getLayoutIndex(); | ||
}); | ||
BF.updateBasicBlockLayout(NewOrder); | ||
} | ||
|
||
return IsChanged; | ||
} | ||
|
||
void LoopInversionPass::runOnFunctions(BinaryContext &BC) { | ||
std::atomic<uint64_t> ModifiedFuncCount{0}; | ||
if (opts::ReorderBlocks == ReorderBasicBlocks::LT_NONE || | ||
opts::LoopReorder == false) | ||
return; | ||
|
||
ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { | ||
if (runOnFunction(BF)) | ||
++ModifiedFuncCount; | ||
}; | ||
|
||
ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { | ||
return !shouldOptimize(BF); | ||
}; | ||
|
||
ParallelUtilities::runOnEachFunction( | ||
BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, SkipFunc, | ||
"LoopInversionPass"); | ||
|
||
outs() << "BOLT-INFO: " << ModifiedFuncCount | ||
<< " Functions were reordered by LoopInversionPass\n"; | ||
} | ||
|
||
} // end namespace bolt | ||
} // end namespace llvm |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
//===--------- Passes/LoopInversionPass.h ---------------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_LOOPINVERSION_H | ||
#define LLVM_TOOLS_LLVM_BOLT_PASSES_LOOPINVERSION_H | ||
|
||
#include "BinaryPasses.h" | ||
|
||
// This pass founds cases when BBs have layout: | ||
// #BB0: | ||
// .... | ||
// #BB1: | ||
// cmp | ||
// cond_jmp #BB3 | ||
// #BB2: | ||
// <loop body> | ||
// jmp #BB1 | ||
// #BB3: | ||
// <loop exit> | ||
// | ||
// And swaps BB1 and BB2: | ||
// #BB0: | ||
// .... | ||
// jmp #BB1 | ||
// #BB2: | ||
// <loop body> | ||
// #BB1: | ||
// cmp | ||
// cond_njmp #BB2 | ||
// #BB3: | ||
// <loop exit> | ||
// | ||
// And vice versa depending on the profile information. | ||
// The advantage is that the loop uses only one conditional jump, | ||
// the unconditional jump is only used once on the loop start. | ||
|
||
namespace llvm { | ||
namespace bolt { | ||
|
||
class LoopInversionPass : public BinaryFunctionPass { | ||
public: | ||
explicit LoopInversionPass() : BinaryFunctionPass(false) {} | ||
|
||
const char *getName() const override { return "loop-inversion-opt"; } | ||
|
||
/// Pass entry point | ||
void runOnFunctions(BinaryContext &BC) override; | ||
bool runOnFunction(BinaryFunction &Function); | ||
}; | ||
|
||
} // namespace bolt | ||
} // namespace llvm | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# REQUIRES: system-linux | ||
|
||
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \ | ||
# RUN: %s -o %t.o | ||
# RUN: link_fdata %s %t.o %t.fdata | ||
# RUN: link_fdata %s %t.o %t.fdata2 "FDATA2" | ||
# RUN: %host_cc %cflags %t.o -o %t.exe -Wl,-q | ||
# RUN: llvm-bolt %t.exe -data %t.fdata -reorder-blocks=cache+ -print-finalized \ | ||
# RUN: -loop-inversion-opt -o %t.out | FileCheck %s | ||
# RUN: llvm-bolt %t.exe -data %t.fdata2 -reorder-blocks=cache+ -print-finalized \ | ||
# RUN: -loop-inversion-opt -o %t.out2 | FileCheck --check-prefix="CHECK2" %s | ||
|
||
# The case where loop is used: | ||
# FDATA: 1 main 2 1 main #.J1# 0 420 | ||
# FDATA: 1 main b 1 main #.Jloop# 0 420 | ||
# FDATA: 1 main b 1 main d 0 1 | ||
# CHECK: BB Layout : .LBB00, .Ltmp0, .Ltmp1, .LFT0 | ||
|
||
# The case where loop is unused: | ||
# FDATA2: 1 main 2 1 main #.J1# 0 420 | ||
# FDATA2: 1 main b 1 main #.Jloop# 0 1 | ||
# FDATA2: 1 main b 1 main d 0 420 | ||
# CHECK2: BB Layout : .LBB00, .Ltmp1, .LFT0, .Ltmp0 | ||
|
||
.text | ||
.globl main | ||
.type main, %function | ||
.size main, .Lend-main | ||
main: | ||
xor %eax, %eax | ||
jmp .J1 | ||
.Jloop: | ||
inc %rax | ||
.J1: | ||
cmp $16, %rax | ||
jl .Jloop | ||
retq | ||
.Lend: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters