diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp index 9b28c7efde5bf..900b787e3e106 100644 --- a/bolt/lib/Passes/Inliner.cpp +++ b/bolt/lib/Passes/Inliner.cpp @@ -472,6 +472,29 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) { } } + // AArch64 BTI: + // If the callee has an indirect tailcall (BR), we would transform it to + // an indirect call (BLR) in InlineCall. Because of this, we would have to + // update the BTI at the target of the tailcall. However, these targets + // are not known. Instead, we skip inlining blocks with indirect + // tailcalls. + auto HasIndirectTailCall = [&](const BinaryFunction &BF) -> bool { + for (const auto &BB : BF) { + for (const auto &II : BB) { + if (BC.MIB->isIndirectBranch(II) && BC.MIB->isTailCall(II)) { + return true; + } + } + } + return false; + }; + + if (BC.isAArch64() && BC.usesBTI() && + HasIndirectTailCall(*TargetFunction)) { + ++InstIt; + continue; + } + LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction << " in " << Function << " : " << BB->getName() << ". Count: " << BB->getKnownExecutionCount() diff --git a/bolt/test/AArch64/inline-bti.s b/bolt/test/AArch64/inline-bti.s new file mode 100644 index 0000000000000..62f6ea6f4b63a --- /dev/null +++ b/bolt/test/AArch64/inline-bti.s @@ -0,0 +1,38 @@ +## This test checks that for AArch64 binaries with BTI, we do not inline blocks with indirect tailcalls. + +# REQUIRES: system-linux + +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o +# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q -Wl,-z,force-bti +# RUN: llvm-bolt --inline-all %t.exe -o %t.bolt | FileCheck %s + +# For BTI, we should not inline foo. +# CHECK-NOT: BOLT-INFO: inlined {{[0-9]+}} calls at {{[0-9]+}} call sites in {{[0-9]+}} iteration(s). Change in binary size: {{[0-9]+}} bytes. + + .text + .globl _Z3fooP1A + .type _Z3fooP1A,@function +_Z3fooP1A: + ldr x8, [x0] + ldr w0, [x8] + br x30 + .size _Z3fooP1A, .-_Z3fooP1A + + .globl _Z3barP1A + .type _Z3barP1A,@function +_Z3barP1A: + stp x29, x30, [sp, #-16]! + mov x29, sp + bl _Z3fooP1A + mul w0, w0, w0 + ldp x29, x30, [sp], #16 + ret + .size _Z3barP1A, .-_Z3barP1A + + .globl main + .p2align 2 + .type main,@function +main: + mov w0, wzr + ret + .size main, .-main