From 36823a0c80776a97b6579baeadbee0e762377720 Mon Sep 17 00:00:00 2001 From: Ash Dobrescu Date: Mon, 22 Sep 2025 16:14:39 +0000 Subject: [PATCH 1/4] [BOLT] Check if symbol is in data area of function There are cases in which `getEntryIDForSymbol` is called, where the given Symbol is in a constant island, and so BOLT can not find its function. This causes BOLT to reach `llvm_unreachable("symbol not found")` and crash. This patch adds a check that avoid this crash and gives a warning to the user. --- bolt/lib/Core/BinaryContext.cpp | 10 +++++- bolt/test/AArch64/check-symbol-area.s | 49 +++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 bolt/test/AArch64/check-symbol-area.s diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index 72c72bbaf4a65..5f3b3c0e57152 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -2374,8 +2374,16 @@ BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, return nullptr; BinaryFunction *BF = BFI->second; - if (EntryDesc) + if (EntryDesc) { + const uint64_t Address = BF->getAddress() + Symbol->getOffset(); + if (BF->isInConstantIsland(Address)) { + this->outs() << "BOLT-WARNING: symbol " << Symbol->getName() + << " is in data region of function 0x" + << Twine::utohexstr(Address) << ".\n"; + return nullptr; + } *EntryDesc = BF->getEntryIDForSymbol(Symbol); + } return BF; } diff --git a/bolt/test/AArch64/check-symbol-area.s b/bolt/test/AArch64/check-symbol-area.s new file mode 100644 index 0000000000000..43ce25e16181f --- /dev/null +++ b/bolt/test/AArch64/check-symbol-area.s @@ -0,0 +1,49 @@ +// This test checks that when looking for a function +// correspnding to a symbol, BOLT is not looking +// through a data area (constant island). + +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s + +// Before adding a check for constant islands, BOLT would exit with an error +// of the form: "symbol not found" and throw an LLVM UNREACHABLE error. +# CHECK-NOT: symbol not found +# CHECK-NOT: UNREACHABLE + +// Now BOLT throws a warning and does not crash. +# CHECK: BOLT-WARNING: symbol [[SYM:.*]] is in data region of function 0x{{.*}}. + +.text +.global main +main: + stp x14, x15, [sp, -8]! + mov x14, sp + adrp x1, .test + add x0, x1, :lo12:.test + bl first_block + ret + +.global first_block +$d: +first_block: + stp x14, x15, [sp, -8]! + mov x14, sp + bl second_block + ret +second_block: + stp x14, x15, [sp, -8]! + mov x14, sp + bl third_block + ret +$x: +third_block: + stp x14, x15, [sp, -8]! + mov x14, sp + adrp x1, .data + add x0, x1, :lo12:.test + ret + +.data +.test: + .string "test" From 797b5b2cc3312f9a5519fd2fd5b777363cb9edbe Mon Sep 17 00:00:00 2001 From: Ash Dobrescu Date: Tue, 23 Sep 2025 10:41:09 +0000 Subject: [PATCH 2/4] Fix warning messgage and test comment --- bolt/lib/Core/BinaryContext.cpp | 2 +- bolt/test/AArch64/check-symbol-area.s | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index 5f3b3c0e57152..43df8b7967d78 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -2379,7 +2379,7 @@ BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, if (BF->isInConstantIsland(Address)) { this->outs() << "BOLT-WARNING: symbol " << Symbol->getName() << " is in data region of function 0x" - << Twine::utohexstr(Address) << ".\n"; + << Twine::utohexstr(BF->getAddress()) << ".\n"; return nullptr; } *EntryDesc = BF->getEntryIDForSymbol(Symbol); diff --git a/bolt/test/AArch64/check-symbol-area.s b/bolt/test/AArch64/check-symbol-area.s index 43ce25e16181f..0125dc2edafca 100644 --- a/bolt/test/AArch64/check-symbol-area.s +++ b/bolt/test/AArch64/check-symbol-area.s @@ -1,5 +1,5 @@ // This test checks that when looking for a function -// correspnding to a symbol, BOLT is not looking +// corresponding to a symbol, BOLT is not looking // through a data area (constant island). # RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o From 550efd1be70a7eb9ddc7f611b6f83a8cfabee529 Mon Sep 17 00:00:00 2001 From: Ash Dobrescu Date: Tue, 7 Oct 2025 15:42:26 +0000 Subject: [PATCH 3/4] Make getEntryIDForSymbol() return optional and address review comments --- bolt/include/bolt/Core/BinaryFunction.h | 3 ++- bolt/lib/Core/BinaryContext.cpp | 7 +++---- bolt/lib/Core/BinaryFunction.cpp | 6 +++--- bolt/test/AArch64/check-symbol-area.s | 22 +++++----------------- 4 files changed, 13 insertions(+), 25 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 51b139a15e1a0..aea418034709f 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -650,7 +650,8 @@ class BinaryFunction { /// /// Prefer to use BinaryContext::getFunctionForSymbol(EntrySymbol, &ID) /// instead of calling this function directly. - uint64_t getEntryIDForSymbol(const MCSymbol *EntrySymbol) const; + std::optional + getEntryIDForSymbol(const MCSymbol *EntrySymbol) const; /// If the function represents a secondary split function fragment, set its /// parent fragment to \p BF. diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index 43df8b7967d78..9630f7d4a8872 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -2378,11 +2378,10 @@ BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, const uint64_t Address = BF->getAddress() + Symbol->getOffset(); if (BF->isInConstantIsland(Address)) { this->outs() << "BOLT-WARNING: symbol " << Symbol->getName() - << " is in data region of function 0x" - << Twine::utohexstr(BF->getAddress()) << ".\n"; - return nullptr; + << " is in data region of function " << BF->getOneName() + << "(0x" << Twine::utohexstr(BF->getAddress()) << ").\n"; } - *EntryDesc = BF->getEntryIDForSymbol(Symbol); + *EntryDesc = BF->getEntryIDForSymbol(Symbol).value_or(0); } return BF; diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 578a87dc6c09d..07d0d23dfc9f0 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -3855,7 +3855,8 @@ MCSymbol *BinaryFunction::getSymbolForEntryID(uint64_t EntryID) { return nullptr; } -uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const { +std::optional +BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const { if (!isMultiEntry()) return 0; @@ -3882,8 +3883,7 @@ uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const { return NumEntries; ++NumEntries; } - - llvm_unreachable("symbol not found"); + return std::nullopt; } bool BinaryFunction::forEachEntryPoint(EntryPointCallbackTy Callback) const { diff --git a/bolt/test/AArch64/check-symbol-area.s b/bolt/test/AArch64/check-symbol-area.s index 0125dc2edafca..ceb5bc562a744 100644 --- a/bolt/test/AArch64/check-symbol-area.s +++ b/bolt/test/AArch64/check-symbol-area.s @@ -12,38 +12,26 @@ # CHECK-NOT: UNREACHABLE // Now BOLT throws a warning and does not crash. -# CHECK: BOLT-WARNING: symbol [[SYM:.*]] is in data region of function 0x{{.*}}. +# CHECK: BOLT-WARNING: symbol third_block/1 is in data region of function first_block(0x{{[0-9a-f]+}}). .text .global main main: - stp x14, x15, [sp, -8]! - mov x14, sp - adrp x1, .test - add x0, x1, :lo12:.test + add x0, x1, x1 bl first_block ret .global first_block $d: first_block: - stp x14, x15, [sp, -8]! - mov x14, sp + add x0, x1, x1 bl second_block ret second_block: - stp x14, x15, [sp, -8]! - mov x14, sp + add x0, x1, x1 bl third_block ret $x: third_block: - stp x14, x15, [sp, -8]! - mov x14, sp - adrp x1, .data - add x0, x1, :lo12:.test + add x0, x1, x1 ret - -.data -.test: - .string "test" From 3e9e9c7b4c402a266991037074dc197febf58dd3 Mon Sep 17 00:00:00 2001 From: Ash Dobrescu Date: Tue, 7 Oct 2025 15:50:39 +0000 Subject: [PATCH 4/4] Expand test comment to 80 char --- bolt/test/AArch64/check-symbol-area.s | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bolt/test/AArch64/check-symbol-area.s b/bolt/test/AArch64/check-symbol-area.s index ceb5bc562a744..abab547dd7558 100644 --- a/bolt/test/AArch64/check-symbol-area.s +++ b/bolt/test/AArch64/check-symbol-area.s @@ -1,6 +1,5 @@ -// This test checks that when looking for a function -// corresponding to a symbol, BOLT is not looking -// through a data area (constant island). +// This test checks that when looking for a function corresponding to a +// symbol, BOLT is not looking through a data area (constant island). # RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q