-
Notifications
You must be signed in to change notification settings - Fork 10.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[BasicBlockSections] Apply path cloning with -basic-block-sections. #68860
Conversation
@llvm/pr-subscribers-backend-x86 Author: Rahman Lavaee (rlavaee) Changes28b9126 introduced the path cloning format in the basic-block-sections profile. This PR validates and applies path clonings.
Applying a path cloning involves cloning all blocks in the path (except the first one) and setting up their branches. Full diff: https://github.com/llvm/llvm-project/pull/68860.diff 4 Files Affected:
diff --git a/.gitignore b/.gitignore
index 20c4f52cd37860e..bd4f8f12cc23916 100644
--- a/.gitignore
+++ b/.gitignore
@@ -70,3 +70,4 @@ pythonenv*
/clang/utils/analyzer/projects/*/RefScanBuildResults
# automodapi puts generated documentation files here.
/lldb/docs/python_api/
+plo/
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 6e01dfd11ee6dad..e4cb7e5b6643d0a 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -25,6 +25,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
+using namespace llvm;
namespace llvm {
diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index 632fd68d88b5c64..7913f069b0f1d72 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -77,8 +77,11 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/WithColor.h"
#include "llvm/Target/TargetMachine.h"
#include <optional>
+#include <sstream>
using namespace llvm;
@@ -99,6 +102,33 @@ static cl::opt<bool> BBSectionsDetectSourceDrift(
namespace {
+MachineBasicBlock *CloneMachineBasicBlock(MachineBasicBlock *MBB) {
+ auto &MF = *MBB->getParent();
+ auto TII = MF.getSubtarget().getInstrInfo();
+
+ auto CloneBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
+ MF.push_back(CloneBB);
+ // Copy the instructions.
+ for (auto &I : MBB->instrs())
+ CloneBB->push_back(MF.CloneMachineInstr(&I));
+
+ // Add the successors of the original block as the new block's successors.
+ for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI)
+ CloneBB->copySuccessor(MBB, SI);
+
+ if (auto FT = MBB->getFallThrough(/*JumpToFallThrough=*/false)) {
+ // The original block has an implicit fall through.
+ // Insert an explicit unconditional jump from the cloned block to the
+ // fallthrough block.
+ TII->insertUnconditionalBranch(*CloneBB, FT, CloneBB->findBranchDebugLoc());
+ }
+
+ for (auto &LiveIn : MBB->liveins())
+ CloneBB->addLiveIn(LiveIn);
+
+ return CloneBB;
+}
+
class BasicBlockSections : public MachineFunctionPass {
public:
static char ID;
@@ -285,6 +315,128 @@ static bool hasInstrProfHashMismatch(MachineFunction &MF) {
return false;
}
+// Returns `Error::success` if we can legally apply all clonings specified in
+// `ClonePaths`, and `Error` otherwise.
+static Error
+CheckValidCloning(const MachineFunction &MF,
+ const SmallVector<SmallVector<unsigned>> &ClonePaths) {
+ // Map from the final BB IDs to the `MachineBasicBlock`s.
+ DenseMap<unsigned, const MachineBasicBlock *> BBIDToBlock;
+ for (auto &BB : MF)
+ BBIDToBlock.try_emplace(*BB.getBBID(), &BB);
+
+ for (auto &ClonePath : ClonePaths) {
+ const MachineBasicBlock *PrevBB = nullptr;
+ for (size_t I = 0; I < ClonePath.size(); ++I) {
+ unsigned BBID = ClonePath[I];
+ const MachineBasicBlock *PathBB = BBIDToBlock.lookup(BBID);
+ if (!PathBB) {
+ return make_error<StringError>(Twine("no block with id ") +
+ Twine(BBID) + " in function " +
+ MF.getName(),
+ inconvertibleErrorCode());
+ }
+
+ if (PrevBB && !PrevBB->isSuccessor(PathBB)) {
+ return make_error<StringError>(
+ Twine("block #") + Twine(BBID) + " is not a successor of block #" +
+ Twine(*PrevBB->getBBID()) + " in function " + MF.getName(),
+ inconvertibleErrorCode());
+ }
+
+ if (I != ClonePath.size() - 1 && !PathBB->empty() &&
+ PathBB->back().isIndirectBranch()) {
+ return make_error<StringError>(
+ Twine("block #") + Twine(BBID) +
+ " has indirect branch can only appear as the last block of the "
+ "path, in function " +
+ MF.getName(),
+ inconvertibleErrorCode());
+ }
+ PrevBB = PathBB;
+ }
+ }
+ return Error::success();
+}
+
+// Applies all clonings specified in `ClonePaths` to `MF` and returns a map
+// from `ProfileBBID`s of all clone blocks to their BBIDs (assigned by
+// `MachineFunction`).
+static DenseMap<ProfileBBID, unsigned>
+ApplyCloning(MachineFunction &MF,
+ const SmallVector<SmallVector<unsigned>> &ClonePaths) {
+ DenseMap<ProfileBBID, unsigned> CloneBBIDMap;
+ // Map from the final BB IDs to the `MachineBasicBlock`s.
+ DenseMap<unsigned, MachineBasicBlock *> BBIDToBlock;
+ for (auto &BB : MF)
+ BBIDToBlock.try_emplace(*BB.getBBID(), &BB);
+
+ DenseMap<unsigned, unsigned> NClonesForBBID;
+ auto TII = MF.getSubtarget().getInstrInfo();
+ for (const auto &ClonePath : ClonePaths) {
+
+ MachineBasicBlock *PrevBB = nullptr;
+ for (unsigned BBID : ClonePath) {
+ MachineBasicBlock *OrigBB = BBIDToBlock.at(BBID);
+ if (PrevBB == nullptr) {
+ if (auto FT = OrigBB->getFallThrough(/*JumpToFallThrough=*/false)) {
+ // Make fallthroughs explicit since we may change the layout.
+ TII->insertUnconditionalBranch(*OrigBB, FT,
+ OrigBB->findBranchDebugLoc());
+ }
+ PrevBB = OrigBB;
+ continue;
+ }
+
+ MachineBasicBlock *CloneBB = CloneMachineBasicBlock(OrigBB);
+
+ CloneBBIDMap.try_emplace({BBID, ++NClonesForBBID[BBID]},
+ *CloneBB->getBBID());
+ // Set up the previous block in the path to jump to the clone.
+ PrevBB->ReplaceUsesOfBlockWith(OrigBB, CloneBB);
+ PrevBB = CloneBB;
+ }
+ }
+ return CloneBBIDMap;
+}
+
+// Processes the raw input profile `PathAndClusterInfo` for the given function
+// `MF` and returns the final profile (cluster information). Returns error if
+// `PathAndClusterInfo` is invalid and cannot be applied (e.g., invalid cloning
+// paths).
+//
+// Steps:
+// 1. Clones basic blocks based on `PathAndClusterInfo.ClonePaths` (if not
+// empty) and
+// updates the CFG accordingly.
+// 2. Creates and returns the cluster profile for the final blocks (original
+// and cloned) based on `PathAndClusterInfo.ClusterInfo`.
+static Expected<DenseMap<unsigned, BBClusterInfo<unsigned>>>
+ProcessProfile(MachineFunction &MF,
+ const FunctionPathAndClusterInfo &PathAndClusterInfo) {
+ if (auto Err = CheckValidCloning(MF, PathAndClusterInfo.ClonePaths))
+ return std::move(Err);
+
+ // Apply the clonings and obtain the map from the input block ID of cloned
+ // blocks to their final BB IDs.
+ DenseMap<ProfileBBID, unsigned> CloneBBIDMap =
+ ApplyCloning(MF, PathAndClusterInfo.ClonePaths);
+
+ // Map from final BB IDs to their profile information.
+ DenseMap<unsigned, BBClusterInfo<unsigned>> ClusterInfoByBBID;
+ // This step creates all the necessary clones. It does not adjust the
+ // branches.
+ for (const BBClusterInfo<ProfileBBID> &P : PathAndClusterInfo.ClusterInfo) {
+ unsigned FinalBBID = P.BasicBlockID.CloneID == 0
+ ? P.BasicBlockID.BBID
+ : CloneBBIDMap.at(P.BasicBlockID);
+ ClusterInfoByBBID.try_emplace(
+ FinalBBID,
+ BBClusterInfo<unsigned>{FinalBBID, P.ClusterID, P.PositionInCluster});
+ }
+ return ClusterInfoByBBID;
+}
+
bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
auto BBSectionsType = MF.getTarget().getBBSectionsType();
assert(BBSectionsType != BasicBlockSection::None &&
@@ -315,17 +467,18 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
.getPathAndClusterInfoForFunction(MF.getName());
if (!HasProfile)
return true;
- for (const BBClusterInfo<ProfileBBID> &BBP :
- PathAndClusterInfo.ClusterInfo) {
- // TODO: Apply the path cloning profile.
- assert(!BBP.BasicBlockID.CloneID && "Path cloning is not supported yet");
- const auto [I, Inserted] = ClusterInfoByBBID.try_emplace(
- BBP.BasicBlockID.BBID,
- BBClusterInfo<unsigned>{BBP.BasicBlockID.BBID, BBP.ClusterID,
- BBP.PositionInCluster});
- (void)I;
- assert(Inserted && "Duplicate BBID found in profile");
+ auto ClusterInfoOrErr = ProcessProfile(MF, PathAndClusterInfo);
+ if (!ClusterInfoOrErr) {
+ auto Err =
+ handleErrors(ClusterInfoOrErr.takeError(), [&](const StringError &E) {
+ WithColor::warning()
+ << "Rejecting the BBSections profile for function "
+ << MF.getName() << " : " << E.getMessage() << "\n";
+ });
+ assert(!Err);
+ return true;
}
+ ClusterInfoByBBID = *std::move(ClusterInfoOrErr);
}
MF.setBBSectionsType(BBSectionsType);
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-cloning.ll b/llvm/test/CodeGen/X86/basic-block-sections-cloning.ll
new file mode 100644
index 000000000000000..0cca18b5aa8ad3a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/basic-block-sections-cloning.ll
@@ -0,0 +1,169 @@
+;; Tests for path cloning with -basic-block-sections.
+
+declare void @effect(i32 zeroext)
+
+;; Test valid application of path cloning.
+; RUN: echo 'v1' > %t1
+; RUN: echo 'f foo' >> %t1
+; RUN: echo 'p 0 3 5' >> %t1
+; RUN: echo 'c 0 3.1 5.1 1 2 3 4 5' >> %t1
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t1 | FileCheck %s --check-prefixes=PATH1,FOOSECTIONS
+; RUN: echo 'v1' > %t2
+; RUN: echo 'f foo' >> %t2
+; RUN: echo 'p 0 3 5' >> %t2
+; RUN: echo 'p 1 3 4 5' >> %t2
+; RUN: echo 'c 0 3.1 5.1' >> %t2
+; RUN: echo 'c 1 3.2 4.1 5.2 2 3 4 5' >> %t2
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t2 | FileCheck %s --check-prefixes=PATH2,FOOSECTIONS
+
+;; Test failed application of path cloning.
+; RUN: echo 'v1' > %t3
+; RUN: echo 'f foo' >> %t3
+; RUN: echo 'p 0 2 3' >> %t3
+; RUN: echo 'c 0 2.1 3.1' >> %t3
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t3 2> %t3.err | FileCheck %s --check-prefix=NOSECTIONS
+; RUN: FileCheck %s --check-prefixes=PATH3-WARN < %t3.err
+; RUN: echo 'v1' > %t4
+; RUN: echo 'f foo' >> %t4
+; RUN: echo 'p 0 1 100' >> %t4
+; RUN: echo 'c 0' >> %t3
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t4 2> %t4.err | FileCheck %s --check-prefix=NOSECTIONS
+; RUN: FileCheck %s --check-prefixes=PATH4-WARN < %t4.err
+
+define void @foo(i1 %a, i1 %b, i1 %c, i1 %d) {
+b0:
+ call void @effect(i32 0)
+ br i1 %a, label %b1, label %b3
+
+b1: ; preds = %b0
+ call void @effect(i32 1)
+ br i1 %b, label %b2, label %b3
+
+b2: ; preds = %b1
+ call void @effect(i32 2)
+ br label %b3
+
+b3: ; preds = %b0, %b1, %b2
+ call void @effect(i32 3)
+ br i1 %c, label %b4, label %b5
+
+b4: ; preds = %b3
+ call void @effect(i32 4)
+ br i1 %d, label %b5, label %cold
+
+b5: ; preds = %b3, %b4
+ call void @effect(i32 5)
+ ret void
+cold:
+ call void @effect(i32 6) ; preds = %b4
+ ret void
+}
+
+; PATH1: .section .text.foo,"ax",@progbits
+; PATH1-LABEL: foo:
+; PATH1: # %bb.0: # %b0
+; PATH1: jne .LBB0_1
+; PATH1-NEXT: # %bb.7: # %b3
+; PATH1: jne .LBB0_4
+; PATH1-NEXT: # %bb.8: # %b5
+; PATH1: retq
+; PATH1-NEXT: .LBB0_1: # %b1
+; PATH1: je .LBB0_3
+; PATH1-NEXT: # %bb.2: # %b2
+; PATH1: callq effect@PLT
+; PATH1-NEXT: .LBB0_3: # %b3
+; PATH1: je .LBB0_5
+; PATH1-NEXT: .LBB0_4: # %b4
+; PATH1: je foo.cold
+; PATH1-NEXT: .LBB0_5: # %b5
+; PATH1: retq
+
+; PATH2: .section .text.foo,"ax",@progbits
+; PATH2-LABEL: foo:
+; PATH2: # %bb.0: # %b0
+; PATH2: jne foo.__part.1
+; PATH2-NEXT: # %bb.7: # %b3
+; PATH2: jne .LBB0_4
+; PATH2-NEXT: # %bb.8: # %b5
+; PATH2: retq
+; PATH2: .section .text.foo,"ax",@progbits,unique,1
+; PATH2-NEXT: foo.__part.1: # %b1
+; PATH2: jne .LBB0_2
+; PATH2-NEXT: # %bb.9: # %b3
+; PATH2: je .LBB0_5
+; PATH2-NEXT: # %bb.10: # %b4
+; PATH2: je foo.cold
+; PATH2-NEXT: # %bb.11: # %b5
+; PATH2: retq
+; PATH2-NEXT: .LBB0_2: # %b2
+; PATH2: callq effect@PLT
+; PATH2-NEXT: # %bb.3: # %b3
+; PATH2: je .LBB0_5
+; PATH2-NEXT: .LBB0_4: # %b4
+; PATH2: je foo.cold
+; PATH2-NEXT: .LBB0_5: # %b5
+; PATH2: retq
+; FOOSECTIONS: foo.cold
+
+; PATH3-WARN: warning: Rejecting the BBSections profile for function foo : block #2 is not a successor of block #0 in function foo
+; PATH4-WARN: warning: Rejecting the BBSections profile for function foo : no block with id 100 in function foo
+; NOSECTIONS-NOT: foo.cold
+
+
+;; Test valid application of cloning for paths with indirect branches.
+; RUN: echo 'v1' > %t5
+; RUN: echo 'f bar' >> %t5
+; RUN: echo 'p 0 1' >> %t5
+; RUN: echo 'c 0 1.1 2 1' >> %t5
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t5 | FileCheck %s --check-prefix=PATH5
+; RUN: echo 'v1' > %t6
+; RUN: echo 'f bar' >> %t6
+; RUN: echo 'p 0 1 2' >> %t6
+; RUN: echo 'c 0 1.1 2.1 1' >> %t6
+
+;; Test failed application of path cloning for paths with indirect branches.
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t6 2> %t6.err | FileCheck %s --check-prefix=NOSECTIONS
+; RUN: FileCheck %s --check-prefixes=PATH-INDIR-WARN < %t6.err
+; RUN: echo 'v1' > %t7
+; RUN: echo 'f bar' >> %t7
+; RUN: echo 'p 1 2' >> %t7
+; RUN: echo 'c 0 1 2.1' >> %t7
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t7 2> %t7.err | FileCheck %s --check-prefix=NOSECTIONS
+; RUN: FileCheck %s --check-prefixes=PATH-INDIR-WARN < %t7.err
+
+
+define void @bar(i1 %a, i1 %b) {
+b0:
+ call void @effect(i32 0)
+ br i1 %a, label %b1, label %b2
+b1: ; preds = %b0
+ call void @effect(i32 1)
+ %0 = select i1 %b, ; <ptr> [#uses=1]
+ ptr blockaddress(@bar, %b2),
+ ptr blockaddress(@bar, %b3)
+ indirectbr ptr %0, [label %b2, label %b3]
+b2: ; preds = %b0, %b1
+ call void @effect(i32 2)
+ ret void
+b3:
+ call void @effect(i32 3) ; preds = %b1
+ ret void
+}
+
+
+; PATH5: .section .text.bar,"ax",@progbits
+; PATH5-LABEL: bar:
+; PATH5: # %bb.0: # %b0
+; PATH5: je .LBB1_2
+; PATH5-NEXT: # %bb.4: # %b1
+; PATH5: jmpq *%rax
+; PATH5-NEXT: .Ltmp0: # Block address taken
+; PATH5-NEXT: .LBB1_2: # %b2
+; PATH5: retq
+; PATH5-NEXT: # %bb.1: # %b1
+; PATH5: jmpq *%rax
+; PATH5: .section .text.split.bar,"ax",@progbits
+; PATH5: bar.cold: # %b3
+; NOSECTIONS-NOT: bar.cold
+
+; PATH-INDIR-WARN: warning: Rejecting the BBSections profile for function bar : block #1 has indirect branch can only appear as the last block of the path, in function bar
|
2b9d84a
to
ddf37b4
Compare
2fb694b
to
5a8dca6
Compare
Do you have some recent perf numbers for this kind of optimization? We've tried something similar internally in the past but didn't see any perf wins; i wonder if it makes sense for us to reconsider the optimization. |
We got 2% improvement on clang (over Propeller-optimized). For internal benchmarks, we got 0.4% improvement on Search before, but I will need to refresh that result. The analysis part of this is not yet released externally though. |
for (auto &I : MBB->instrs()) | ||
CloneBB->push_back(MF.CloneMachineInstr(&I)); | ||
|
||
// Add the successors of the original block as the new block's successors. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not predecessors here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The single predecessor for this block is set after returning from this call.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure, sound good.
@@ -99,6 +101,33 @@ static cl::opt<bool> BBSectionsDetectSourceDrift( | |||
|
|||
namespace { | |||
|
|||
MachineBasicBlock *CloneMachineBasicBlock(MachineBasicBlock *MBB) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why can't this be part of IR itself, why not in BasicBlock.h?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need to copy the machine-instructions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mean CodeGen/MachineBasicBlock.h. This is a simple change so I am not insisting.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see. MachineFunction.h is the proper place. I augmented MachineFunction::createMachineBasicBlock
so we can piggy-back on it.
; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t4 2> %t4.err | FileCheck %s --check-prefixes=FOONOCLONE,FOOSECTIONS | ||
; RUN: FileCheck %s --check-prefixes=PATH4-WARN < %t4.err | ||
|
||
define void @foo(i1 %a, i1 %b, i1 %c, i1 %d) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a comment on which call instruction corresponds to which bbid?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The test will check the name of the blocks (emitted as comment after the block's label). Please let me know if you have other suggestions.
✅ With the latest revision this PR passed the C/C++ code formatter. |
75198b1
to
89af401
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
did not end up doing a deep review. But added some nits :)
|
||
// Copy the instructions. | ||
for (auto &I : OrigBB.instrs()) | ||
CloneBB->push_back(MF.CloneMachineInstr(&I)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see a TargetInstrInfo::duplicate
API that appears to add functionality on top of MachineFunction::CloneMachineInstr
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like this only clones bundled instructions (https://llvm.org/doxygen/MachineFunction_8cpp_source.html#l00418), while I want all instructions cloned.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you should rather read this as it cannot clone instructions within a bundle, but apart from that you can probably consider every instruction as a bundle of one for this API.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The only Target with an interesting implementation (the default version just calls CloneMachineInstr
anyway), seems to be ARM here: https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp#L1836 which seems to duplicate constant pool values so they can be kept close to the instruction and avoid the risk of ARM immediates being too small? So probably worth using the API?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see. You meant using the duplicate
API instead of CloneMachineInstr
. That makes sense. I adopted the API but had to relax an assertion in its implementation to allow copying CFI instructions. (CFI instructions are marked non-duplicable because of Darwin:
// Non-duplicable things shouldn't be tail-duplicated. |
if (!PathBB) { | ||
WithColor::warning() << "no block with id " << BBID << " in function " | ||
<< MF.getName() << "\n"; | ||
return false; | ||
} | ||
|
||
if (PrevBB && !PrevBB->isSuccessor(PathBB)) { | ||
WithColor::warning() << "block #" << BBID | ||
<< " is not a successor of block #" | ||
<< PrevBB->getBBID()->BaseID << " in function " | ||
<< MF.getName() << "\n"; | ||
return false; | ||
} | ||
|
||
if (I != ClonePath.size() - 1 && !PathBB->empty() && | ||
PathBB->back().isIndirectBranch()) { | ||
WithColor::warning() | ||
<< "block #" << BBID | ||
<< " has indirect branch and appears as the non-tail block of a " | ||
"path in function " | ||
<< MF.getName() << "\n"; | ||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I find it unusual to see "warnings" in the backend. We tend to go for a fail fast coding style with assert
s or report_fatal_error
s usually and don't try to continue when there is invalid conditions...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is more like a PGO hash mismatch warning. We proceed with applying other clonings even though one may fail. Do you suggest I use dbgs()
instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the review @MatzeB
|
||
// Copy the instructions. | ||
for (auto &I : OrigBB.instrs()) | ||
CloneBB->push_back(MF.CloneMachineInstr(&I)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like this only clones bundled instructions (https://llvm.org/doxygen/MachineFunction_8cpp_source.html#l00418), while I want all instructions cloned.
if (!PathBB) { | ||
WithColor::warning() << "no block with id " << BBID << " in function " | ||
<< MF.getName() << "\n"; | ||
return false; | ||
} | ||
|
||
if (PrevBB && !PrevBB->isSuccessor(PathBB)) { | ||
WithColor::warning() << "block #" << BBID | ||
<< " is not a successor of block #" | ||
<< PrevBB->getBBID()->BaseID << " in function " | ||
<< MF.getName() << "\n"; | ||
return false; | ||
} | ||
|
||
if (I != ClonePath.size() - 1 && !PathBB->empty() && | ||
PathBB->back().isIndirectBranch()) { | ||
WithColor::warning() | ||
<< "block #" << BBID | ||
<< " has indirect branch and appears as the non-tail block of a " | ||
"path in function " | ||
<< MF.getName() << "\n"; | ||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is more like a PGO hash mismatch warning. We proceed with applying other clonings even though one may fail. Do you suggest I use dbgs()
instead?
7572fad
to
959b3cc
Compare
a14ce40
to
88577f0
Compare
rebase.
Also check for non-duplicable instructions in the block similar to TailDuplicator.cpp.
88577f0
to
f8499fb
Compare
@rlavaee could you please point to the code where these cloned paths are being implemented? There is likely a non-trivial tradeoff between eliminating branches/jumps and increasing the code size. I'm curious to see the algorithm |
Thanks for the interest. We will open-source the implementation under https://github.com/google/autofdo in a couple months. |
28b9126 introduced the path cloning format in the basic-block-sections profile.
This PR validates and applies path clonings.
A path cloning is valid if all of these conditions hold:
Applying a path cloning involves cloning all blocks in the path (except the first one) and setting up their branches.
Once all clonings are applied, the cluster information is used to guide block layout in the modified function.