From 2252da889aa82ec668199f5fa292dd1b0206e5ba Mon Sep 17 00:00:00 2001 From: Maksim Panchenko Date: Mon, 22 Sep 2025 11:01:37 -0700 Subject: [PATCH] [BOLT] Always treat function entry as code If an address has both, a data marker "$d" and a function symbol associated with it, treat it as code. --- bolt/lib/Rewrite/RewriteInstance.cpp | 13 ++++++++----- bolt/test/AArch64/data-at-0-offset.c | 17 ----------------- bolt/test/AArch64/function-data-marker.s | 23 +++++++++++++++++++++++ 3 files changed, 31 insertions(+), 22 deletions(-) delete mode 100644 bolt/test/AArch64/data-at-0-offset.c create mode 100644 bolt/test/AArch64/function-data-marker.s diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index c13a9f016e8ae..5b10dcc056669 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -917,9 +917,6 @@ void RewriteInstance::discoverFileObjects() { bool IsData = false; uint64_t LastAddr = 0; for (const auto &SymInfo : SortedSymbols) { - if (LastAddr == SymInfo.Address) // don't repeat markers - continue; - MarkerSymType MarkerType = BC->getMarkerType(SymInfo.Symbol); // Treat ST_Function as code. @@ -929,8 +926,14 @@ void RewriteInstance::discoverFileObjects() { if (IsData) { Expected NameOrError = SymInfo.Symbol.getName(); consumeError(NameOrError.takeError()); - BC->errs() << "BOLT-WARNING: function symbol " << *NameOrError - << " lacks code marker\n"; + if (LastAddr == SymInfo.Address) { + BC->errs() << "BOLT-WARNING: ignoring data marker conflicting with " + "function symbol " + << *NameOrError << '\n'; + } else { + BC->errs() << "BOLT-WARNING: function symbol " << *NameOrError + << " lacks code marker\n"; + } } MarkerType = MarkerSymType::CODE; } diff --git a/bolt/test/AArch64/data-at-0-offset.c b/bolt/test/AArch64/data-at-0-offset.c deleted file mode 100644 index 01248a637d393..0000000000000 --- a/bolt/test/AArch64/data-at-0-offset.c +++ /dev/null @@ -1,17 +0,0 @@ -// RUN: %clang %cflags -O2 -fPIE -std=gnu99 -Wl,-q -pie %s -o %t.exe -// RUN: llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s -// CHECK-NOT: BOLT-WARNING: unable to disassemble instruction at offset - -void extra_space() { - asm volatile(".rept 256\n" - " .byte 0xff\n" - ".endr\n"); - return; -} - -int main(int argc, char **argv) { - void (*fn)(void); - fn = extra_space + 256; - fn(); - return 0; -} diff --git a/bolt/test/AArch64/function-data-marker.s b/bolt/test/AArch64/function-data-marker.s new file mode 100644 index 0000000000000..71b79acf0fc7f --- /dev/null +++ b/bolt/test/AArch64/function-data-marker.s @@ -0,0 +1,23 @@ +## Check that if a data marker is present at the start of a function, the +## underlying bytes are still treated as code. + +# RUN: %clang %cflags %s -o %t.exe +# RUN: llvm-bolt %t.exe -o %t.bolt --print-cfg 2>&1 | FileCheck %s + +# CHECK: BOLT-WARNING: ignoring data marker conflicting with function symbol _start + +.text +.balign 4 + +## Data marker is emitted because ".long" directive is used instead of ".inst". +.global _start +.type _start, %function +_start: + .long 0xcec08000 // sha512su0 v0.2d, v0.2d + ret +.size _start, .-_start + +# CHECK-LABEL: Binary Function "_start" +# CHECK: Entry Point +# CHECK-NEXT: sha512su0 v0.2d, v0.2d +