From 6280d2382724b449c1a3fd63d19c4e934caa4d34 Mon Sep 17 00:00:00 2001 From: Midhunesh Date: Thu, 13 Nov 2025 12:04:34 +0530 Subject: [PATCH 1/3] section relative syntax implementation --- .../llvm-symbolizer/xcoff-section-relative.ll | 51 ++++++ .../llvm-symbolizer/xcoff-section-syntax.test | 31 ++++ .../tools/llvm-symbolizer/llvm-symbolizer.cpp | 158 +++++++++++++++++- 3 files changed, 232 insertions(+), 8 deletions(-) create mode 100644 llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll create mode 100644 llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test diff --git a/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll b/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll new file mode 100644 index 0000000000000..cfc6b31812a98 --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll @@ -0,0 +1,51 @@ +;; Test section-relative address syntax for XCOFF +;; The syntax (SECTION_TYPE)(+offset) represents: offset from section base + +; REQUIRES: system-aix +; RUN: llc -filetype=obj -o %t -mtriple=powerpc-aix-ibm-xcoff -function-sections < %s + +;; Test 1: Symbolize .foo using section-relative offset +; RUN: llvm-nm --numeric-sort %t | grep " T \.foo$" | awk '{printf "CODE (TEXT)(+0x%%s)", $1}' > %t.foo_query +; RUN: llvm-symbolizer --obj=%t @%t.foo_query | FileCheck %s --check-prefix=TEST-FOO + +;; Test 2: Symbolize .bar using section-relative offset +; RUN: llvm-nm --numeric-sort %t | grep " T \.bar$" | awk '{printf "CODE (TEXT)(+0x%%s)", $1}' > %t.bar_query +; RUN: llvm-symbolizer --obj=%t @%t.bar_query | FileCheck %s --check-prefix=TEST-BAR + +;; Test 3: Symbolize global_var using section-relative offset in DATA section +; RUN: llvm-readobj --sections %t | awk '/Name: \.data/{found=1} found && /VirtualAddress:/{print $2; exit}' > %t.data_base +; RUN: llvm-nm --numeric-sort %t | grep " D global_var$" | awk '{print $1}' > %t.global_var_vma +; RUN: sh -c 'printf "%%d\n" $(cat %t.data_base)' > %t.data_base_dec +; RUN: sh -c 'printf "%%d\n" 0x$(cat %t.global_var_vma)' > %t.global_var_dec +; RUN: awk 'NR==FNR{base=$1; next} {vma=$1; printf "DATA (DATA)(+0x%%x)", vma-base}' %t.data_base_dec %t.global_var_dec > %t.data_query +; RUN: llvm-symbolizer --obj=%t @%t.data_query | FileCheck %s --check-prefix=TEST-DATA + +;; Test 4: Verify section structure with llvm-readobj +; RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix=SECTIONS + +define void @foo() { +entry: + ret void +} + +define void @bar() { +entry: + ret void +} + +@global_var = global i32 42, align 4 + +;; Verify correct symbolization with section-relative syntax +; TEST-FOO: .foo +; TEST-FOO-NEXT: ??:0:0 + +; TEST-BAR: .bar +; TEST-BAR-NEXT: ??:0:0 + +; TEST-DATA: global_var + +;; Verify XCOFF sections exist with correct types +; SECTIONS: Name: .text +; SECTIONS: Type: STYP_TEXT +; SECTIONS: Name: .data +; SECTIONS: Type: STYP_DATA \ No newline at end of file diff --git a/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test b/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test new file mode 100644 index 0000000000000..01bda672387f4 --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test @@ -0,0 +1,31 @@ +## Test section-relative address syntax parsing for XCOFF +## This tests that the (SECTION_TYPE)(+offset) syntax produces appropriate +## error messages for invalid syntax + +# REQUIRES: system-aix + +## Create a simple XCOFF object for testing +# RUN: echo "define void @test() { ret void }" | \ +# RUN: llc -filetype=obj -mtriple=powerpc-aix-ibm-xcoff -o %t.o + +## Test invalid section type +# RUN: llvm-symbolizer --obj=%t.o '(INVALID)(+0x10)' 2>&1 | \ +# RUN: FileCheck %s --check-prefix=INVALID-TYPE + +## Test missing '+' sign +# RUN: llvm-symbolizer --obj=%t.o '(TEXT)(0x10)' 2>&1 | \ +# RUN: FileCheck %s --check-prefix=NO-PLUS + +## Test invalid offset value (not a hex number) +# RUN: llvm-symbolizer --obj=%t.o '(TEXT)(+abc)' 2>&1 | \ +# RUN: FileCheck %s --check-prefix=INVALID-OFFSET + +## Test empty section type +# RUN: llvm-symbolizer --obj=%t.o '()(+0x10)' 2>&1 | \ +# RUN: FileCheck %s --check-prefix=EMPTY-SECTION + +## Verify error messages are helpful +# INVALID-TYPE: unknown section type +# NO-PLUS: section-relative offset must start with '+' +# INVALID-OFFSET: invalid offset in section-relative address +# EMPTY-SECTION: unknown section type \ No newline at end of file diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp index 4784dafeb2948..d239d1aad73d7 100644 --- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -17,12 +17,15 @@ #include "Opts.inc" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/BinaryFormat/XCOFF.h" #include "llvm/Config/config.h" #include "llvm/DebugInfo/Symbolize/DIPrinter.h" #include "llvm/DebugInfo/Symbolize/Markup.h" #include "llvm/DebugInfo/Symbolize/MarkupFilter.h" #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" +#include "llvm/Object/XCOFFObjectFile.h" #include "llvm/Debuginfod/BuildIDFetcher.h" #include "llvm/Debuginfod/Debuginfod.h" #include "llvm/Debuginfod/HTTPClient.h" @@ -157,11 +160,97 @@ static Error makeStringError(StringRef Msg) { return make_error(Msg, inconvertibleErrorCode()); } +// Helper function to get XCOFF section type flag from string + static std::optional parseXCOFFSectionType(StringRef TypeStr) { + return StringSwitch>(TypeStr) + .Case("PAD", XCOFF::STYP_PAD) + .Case("DWARF", XCOFF::STYP_DWARF) + .Case("TEXT", XCOFF::STYP_TEXT) + .Case("DATA", XCOFF::STYP_DATA) + .Case("BSS", XCOFF::STYP_BSS) + .Case("EXCEPT", XCOFF::STYP_EXCEPT) + .Case("INFO", XCOFF::STYP_INFO) + .Case("TDATA", XCOFF::STYP_TDATA) + .Case("TBSS", XCOFF::STYP_TBSS) + .Case("LOADER", XCOFF::STYP_LOADER) + .Case("DEBUG", XCOFF::STYP_DEBUG) + .Case("TYPCHK", XCOFF::STYP_TYPCHK) + .Case("OVRFLO", XCOFF::STYP_OVRFLO) + .Default(std::nullopt); + } + + // Find the base VMA of the first section matching the given type for XCOFF. + // The syntax (SECTION_TYPE)(+offset) represents an offset from the section base, + // so we return the section's base address to compute: VMA = base + offset. + static Expected getXCOFFSectionBaseAddress( + const object::XCOFFObjectFile *XCOFFObj, + XCOFF::SectionTypeFlags TypeFlag) { + + for (const auto &Section : XCOFFObj->sections()) { + DataRefImpl SecRef = Section.getRawDataRefImpl(); + int32_t Flags = XCOFFObj->getSectionFlags(SecRef); + + if ((Flags & 0xFFFF) == TypeFlag) { + return Section.getAddress(); + } + } + + return createStringError(inconvertibleErrorCode(), + "section type not found in XCOFF object"); + } + + static Expected validateSectionType(StringRef ModulePath, + StringRef SectionType, + uint64_t &Offset, + LLVMSymbolizer &Symbolizer) { + // Parse the section type string + auto SectionTypeFlag = parseXCOFFSectionType(SectionType); + if (!SectionTypeFlag) { + return createStringError(inconvertibleErrorCode(), + "unknown section type: " + SectionType.str()); + } + + // Get the module info to access the object file + auto ModuleOrErr = Symbolizer.getOrCreateModuleInfo(ModulePath); + if (!ModuleOrErr) { + return ModuleOrErr.takeError(); + } + + auto BinaryOrErr = object::createBinary(ModulePath); + if (!BinaryOrErr) { + return BinaryOrErr.takeError(); + } + + object::Binary *Binary = BinaryOrErr->getBinary(); + if (auto *XCOFFObj = dyn_cast(Binary)) { + // Get the base VMA of the section matching the type + auto SectionBaseOrErr = getXCOFFSectionBaseAddress(XCOFFObj, *SectionTypeFlag); + if (!SectionBaseOrErr) + return SectionBaseOrErr.takeError(); + + uint64_t SectionBase = *SectionBaseOrErr; + uint64_t SectionRelativeOffset = Offset; + + // Convert section-relative offset to absolute VMA + // VMA = section_base + offset + Offset = SectionBase + SectionRelativeOffset; + + // Return UndefSection - XCOFF symbolizer doesn't support SectionedAddress, + // so we use absolute VMA addressing instead. + return object::SectionedAddress::UndefSection; + } + + return createStringError(inconvertibleErrorCode(), + "section type syntax is only supported for XCOFF objects"); + } + static Error parseCommand(StringRef BinaryName, bool IsAddr2Line, StringRef InputString, Command &Cmd, std::string &ModuleName, object::BuildID &BuildID, - StringRef &Symbol, uint64_t &Offset) { + StringRef &Symbol, uint64_t &Offset, + StringRef &SectionType) { ModuleName = BinaryName; + SectionType = StringRef(); if (InputString.consume_front("CODE ")) { Cmd = Command::Code; } else if (InputString.consume_front("DATA ")) { @@ -245,10 +334,43 @@ static Error parseCommand(StringRef BinaryName, bool IsAddr2Line, AddrSpec.consume_front_insensitive("+0x"); } + // Check for section-relative address syntax: (SECTION_TYPE)(+0x0) + if (AddrSpec.starts_with("(")) { + size_t FirstClose = AddrSpec.find(')'); + if (FirstClose != StringRef::npos && FirstClose + 1 < AddrSpec.size() && + AddrSpec[FirstClose + 1] == '(') { + size_t SecondOpen = FirstClose + 1; + size_t SecondClose = AddrSpec.find(')', SecondOpen); + if (SecondClose != StringRef::npos) { + // Extract section type from first parentheses + SectionType = AddrSpec.substr(1, FirstClose - 1); + + // Validate that section type is not empty + if (SectionType.empty()) + return makeStringError("unknown section type: empty section type"); + + // Extract offset from second parentheses + StringRef OffsetPart = AddrSpec.substr(SecondOpen + 1, SecondClose - SecondOpen - 1); + + // The offset should start with '+' + if (!OffsetPart.consume_front("+")) + return makeStringError("section-relative offset must start with '+'"); + + // Parse the offset - auto-detect base (0x prefix = hex, otherwise decimal) + if (OffsetPart.getAsInteger(0, Offset)) + return makeStringError("invalid offset in section-relative address"); + + Symbol = StringRef(); + return Error::success(); + } + } + } + // If address specification is a number, treat it as a module offset. if (!AddrSpec.getAsInteger(IsAddr2Line ? 16 : 0, Offset)) { // Module offset is an address. Symbol = StringRef(); + SectionType = StringRef(); return Error::success(); } @@ -260,6 +382,7 @@ static Error parseCommand(StringRef BinaryName, bool IsAddr2Line, // Otherwise it is a symbol name, potentially with an offset. Symbol = AddrSpec; Offset = 0; + SectionType = StringRef(); // If the address specification contains '+', try treating it as // "symbol + offset". @@ -282,10 +405,11 @@ template void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd, StringRef Symbol, uint64_t Offset, uint64_t AdjustVMA, bool ShouldInline, OutputStyle Style, - LLVMSymbolizer &Symbolizer, DIPrinter &Printer) { - uint64_t AdjustedOffset = Offset - AdjustVMA; - object::SectionedAddress Address = {AdjustedOffset, - object::SectionedAddress::UndefSection}; + LLVMSymbolizer &Symbolizer, DIPrinter &Printer, + uint64_t SectionIndex) { + uint64_t AdjustedOffset = Offset - AdjustVMA; + object::SectionedAddress Address = {AdjustedOffset, SectionIndex}; + Request SymRequest = { ModuleName, Symbol.empty() ? std::make_optional(Offset) : std::nullopt, Symbol}; @@ -342,6 +466,7 @@ static void symbolizeInput(const opt::InputArgList &Args, object::BuildID BuildID(IncomingBuildID.begin(), IncomingBuildID.end()); uint64_t Offset = 0; StringRef Symbol; + StringRef SectionType; // An empty input string may be used to check if the process is alive and // responding to input. Do not emit a message on stderr in this case but @@ -352,24 +477,41 @@ static void symbolizeInput(const opt::InputArgList &Args, } if (Error E = parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line, StringRef(InputString), Cmd, ModuleName, BuildID, - Symbol, Offset)) { + Symbol, Offset, SectionType)) { handleAllErrors(std::move(E), [&](const StringError &EI) { printError(EI, InputString); printUnknownLineInfo(ModuleName, Printer); }); return; } + + // Validate section index from section type if specified + uint64_t SectionIndex = object::SectionedAddress::UndefSection; + if (!SectionType.empty() && !ModuleName.empty()) { + auto SectionIndexOrErr = validateSectionType(ModuleName, SectionType, Offset, Symbolizer); + if (!SectionIndexOrErr) { + handleAllErrors(SectionIndexOrErr.takeError(), [&](const ErrorInfoBase &EI) { + printError(EI, InputString); + }); + printUnknownLineInfo(ModuleName, Printer); + return; + } + SectionIndex = *SectionIndexOrErr; + } + bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line); if (!BuildID.empty()) { assert(ModuleName.empty()); if (!Args.hasArg(OPT_no_debuginfod)) enableDebuginfod(Symbolizer, Args); std::string BuildIDStr = toHex(BuildID); + // Note: Section type resolution is not supported for BuildID-based lookup executeCommand(BuildIDStr, BuildID, Cmd, Symbol, Offset, AdjustVMA, - ShouldInline, Style, Symbolizer, Printer); + ShouldInline, Style, Symbolizer, Printer, + object::SectionedAddress::UndefSection); } else { executeCommand(ModuleName, ModuleName, Cmd, Symbol, Offset, AdjustVMA, - ShouldInline, Style, Symbolizer, Printer); +ShouldInline, Style, Symbolizer, Printer, SectionIndex); } } From 07ab95bdabbf2c69f0fe18d7acb7103e262c19d5 Mon Sep 17 00:00:00 2001 From: Midhunesh Date: Sun, 9 Nov 2025 12:33:22 -0500 Subject: [PATCH 2/3] symbolizer to accept section relative syntax --- llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll | 2 +- llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test | 2 +- llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll b/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll index cfc6b31812a98..d1e21fe135e9e 100644 --- a/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll +++ b/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll @@ -48,4 +48,4 @@ entry: ; SECTIONS: Name: .text ; SECTIONS: Type: STYP_TEXT ; SECTIONS: Name: .data -; SECTIONS: Type: STYP_DATA \ No newline at end of file +; SECTIONS: Type: STYP_DATA diff --git a/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test b/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test index 01bda672387f4..ca5ef9d3cb2cc 100644 --- a/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test +++ b/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test @@ -28,4 +28,4 @@ # INVALID-TYPE: unknown section type # NO-PLUS: section-relative offset must start with '+' # INVALID-OFFSET: invalid offset in section-relative address -# EMPTY-SECTION: unknown section type \ No newline at end of file +# EMPTY-SECTION: unknown section type diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp index d239d1aad73d7..3bdbce55c4f68 100644 --- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -29,6 +29,7 @@ #include "llvm/Debuginfod/BuildIDFetcher.h" #include "llvm/Debuginfod/Debuginfod.h" #include "llvm/Debuginfod/HTTPClient.h" +#include "llvm/Object/XCOFFObjectFile.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" From a0b4db7d32a4a3ca40141e555ee6a2c428c47593 Mon Sep 17 00:00:00 2001 From: Midhunesh Date: Tue, 18 Nov 2025 07:24:11 -0500 Subject: [PATCH 3/3] code format fix --- llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp index 3bdbce55c4f68..1ad5d43409b4a 100644 --- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -1,4 +1,5 @@ -//===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===// +//===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer +//------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information.