Skip to content

Commit

Permalink
[symbolizer] Support symbol lookup
Browse files Browse the repository at this point in the history
Recent versions of GNU binutils starting from 2.39 support symbol+offset
lookup in addition to the usual numeric address lookup. This change adds
symbol lookup to llvm-symbolize and llvm-addr2line.

Now llvm-symbolize behaves closer to GNU addr2line, - if the value specified
as address in command line or input stream is not a number, it is treated as
a symbol name. For example:

    llvm-symbolize --obj=abc.so func_22
    llvm-symbolize --obj=abc.so "CODE func_22"

This lookup is now supported only for functions. Specification with
offset is not supported yet.

Differential Revision: https://reviews.llvm.org/D149759
  • Loading branch information
spavloff committed Oct 2, 2023
1 parent 1a3a1d9 commit 2b27948
Show file tree
Hide file tree
Showing 23 changed files with 329 additions and 41 deletions.
13 changes: 12 additions & 1 deletion llvm/docs/CommandGuide/llvm-symbolizer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ DESCRIPTION
:program:`llvm-symbolizer` reads input names and addresses from the command-line
and prints corresponding source code locations to standard output. It can also
symbolize logs containing :doc:`Symbolizer Markup </SymbolizerMarkupFormat>` via
:option:`--filter-markup`.
:option:`--filter-markup`. Addresses may be specified as numbers or symbol names.

If no address is specified on the command-line, it reads the addresses from
standard input. If no input name is specified on the command-line, but addresses
Expand Down Expand Up @@ -196,6 +196,17 @@ shows --relativenames.
main
foo/test.cpp:15:0
Example 7 - Addresses as symbol names:

.. code-block:: console
$ llvm-symbolizer --obj=test.elf main
main
/tmp/test.cpp:14:0
$ llvm-symbolizer --obj=test.elf "CODE foz"
foz
/tmp/test.h:1:0
OPTIONS
-------

Expand Down
2 changes: 2 additions & 0 deletions llvm/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ Changes to the LLVM tools
* ``llvm-nm`` now supports the ``--line-numbers`` (``-l``) option to use
debugging information to print symbols' filenames and line numbers.

* llvm-symbolizer and llvm-addr2line now support addresses specified as symbol names.

Changes to LLDB
---------------------------------

Expand Down
7 changes: 7 additions & 0 deletions llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class SourceCode;
struct Request {
StringRef ModuleName;
std::optional<uint64_t> Address;
StringRef Symbol;
};

class DIPrinter {
Expand All @@ -46,6 +47,8 @@ class DIPrinter {
virtual void print(const Request &Request, const DIGlobal &Global) = 0;
virtual void print(const Request &Request,
const std::vector<DILocal> &Locals) = 0;
virtual void print(const Request &Request,
const std::vector<DILineInfo> &Locations) = 0;

virtual bool printError(const Request &Request,
const ErrorInfoBase &ErrorInfo) = 0;
Expand Down Expand Up @@ -91,6 +94,8 @@ class PlainPrinterBase : public DIPrinter {
void print(const Request &Request, const DIGlobal &Global) override;
void print(const Request &Request,
const std::vector<DILocal> &Locals) override;
void print(const Request &Request,
const std::vector<DILineInfo> &Locations) override;

bool printError(const Request &Request,
const ErrorInfoBase &ErrorInfo) override;
Expand Down Expand Up @@ -141,6 +146,8 @@ class JSONPrinter : public DIPrinter {
void print(const Request &Request, const DIGlobal &Global) override;
void print(const Request &Request,
const std::vector<DILocal> &Locals) override;
void print(const Request &Request,
const std::vector<DILineInfo> &Locations) override;

bool printError(const Request &Request,
const ErrorInfoBase &ErrorInfo) override;
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ class SymbolizableModule {
virtual std::vector<DILocal>
symbolizeFrame(object::SectionedAddress ModuleOffset) const = 0;

virtual std::vector<object::SectionedAddress>
findSymbol(StringRef Symbol) const = 0;

// Return true if this is a 32-bit x86 PE COFF module.
virtual bool isWin32Module() const = 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ class SymbolizableObjectFile : public SymbolizableModule {
DIGlobal symbolizeData(object::SectionedAddress ModuleOffset) const override;
std::vector<DILocal>
symbolizeFrame(object::SectionedAddress ModuleOffset) const override;
std::vector<object::SectionedAddress>
findSymbol(StringRef Symbol) const override;

// Return true if this is a 32-bit x86 PE COFF module.
bool isWin32Module() const override;
Expand Down
11 changes: 11 additions & 0 deletions llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,14 @@ class LLVMSymbolizer {
Expected<std::vector<DILocal>>
symbolizeFrame(ArrayRef<uint8_t> BuildID,
object::SectionedAddress ModuleOffset);

Expected<std::vector<DILineInfo>> findSymbol(const ObjectFile &Obj,
StringRef Symbol);
Expected<std::vector<DILineInfo>> findSymbol(StringRef ModuleName,
StringRef Symbol);
Expected<std::vector<DILineInfo>> findSymbol(ArrayRef<uint8_t> BuildID,
StringRef Symbol);

void flush();

// Evict entries from the binary cache until it is under the maximum size
Expand Down Expand Up @@ -146,6 +154,9 @@ class LLVMSymbolizer {
Expected<std::vector<DILocal>>
symbolizeFrameCommon(const T &ModuleSpecifier,
object::SectionedAddress ModuleOffset);
template <typename T>
Expected<std::vector<DILineInfo>> findSymbolCommon(const T &ModuleSpecifier,
StringRef Symbol);

Expected<SymbolizableModule *> getOrCreateModuleInfo(const ObjectFile &Obj);

Expand Down
26 changes: 26 additions & 0 deletions llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,17 @@ void PlainPrinterBase::print(const Request &Request,
printFooter();
}

void PlainPrinterBase::print(const Request &Request,
const std::vector<DILineInfo> &Locations) {
if (Locations.empty()) {
print(Request, DILineInfo());
} else {
for (const DILineInfo &L : Locations)
print(L, false);
printFooter();
}
}

bool PlainPrinterBase::printError(const Request &Request,
const ErrorInfoBase &ErrorInfo) {
ErrHandler(ErrorInfo, Request.ModuleName);
Expand All @@ -273,6 +284,8 @@ static std::string toHex(uint64_t V) {

static json::Object toJSON(const Request &Request, StringRef ErrorMsg = "") {
json::Object Json({{"ModuleName", Request.ModuleName.str()}});
if (!Request.Symbol.empty())
Json["SymName"] = Request.Symbol.str();
if (Request.Address)
Json["Address"] = toHex(*Request.Address);
if (!ErrorMsg.empty())
Expand Down Expand Up @@ -362,6 +375,19 @@ void JSONPrinter::print(const Request &Request,
printJSON(std::move(Json));
}

void JSONPrinter::print(const Request &Request,
const std::vector<DILineInfo> &Locations) {
json::Array Definitions;
for (const DILineInfo &L : Locations)
Definitions.push_back(toJSON(L));
json::Object Json = toJSON(Request);
Json["Loc"] = std::move(Definitions);
if (ObjectList)
ObjectList->push_back(std::move(Json));
else
printJSON(std::move(Json));
}

bool JSONPrinter::printError(const Request &Request,
const ErrorInfoBase &ErrorInfo) {
json::Object Json = toJSON(Request, ErrorInfo.message());
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,19 @@ std::vector<DILocal> SymbolizableObjectFile::symbolizeFrame(
return DebugInfoContext->getLocalsForAddress(ModuleOffset);
}

std::vector<object::SectionedAddress>
SymbolizableObjectFile::findSymbol(StringRef Symbol) const {
std::vector<object::SectionedAddress> Result;
for (const SymbolDesc &Sym : Symbols) {
if (Sym.Name.equals(Symbol)) {
object::SectionedAddress A{Sym.Addr,
getModuleSectionIndexForAddress(Sym.Addr)};
Result.push_back(A);
}
}
return Result;
}

/// Search for the first occurence of specified Address in ObjectFile.
uint64_t SymbolizableObjectFile::getModuleSectionIndexForAddress(
uint64_t Address) const {
Expand Down
44 changes: 44 additions & 0 deletions llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,50 @@ LLVMSymbolizer::symbolizeFrame(ArrayRef<uint8_t> BuildID,
return symbolizeFrameCommon(BuildID, ModuleOffset);
}

template <typename T>
Expected<std::vector<DILineInfo>>
LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol) {
auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
if (!InfoOrErr)
return InfoOrErr.takeError();

SymbolizableModule *Info = *InfoOrErr;
std::vector<DILineInfo> Result;

// A null module means an error has already been reported. Return an empty
// result.
if (!Info)
return Result;

for (object::SectionedAddress A : Info->findSymbol(Symbol)) {
DILineInfo LineInfo = Info->symbolizeCode(
A, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions),
Opts.UseSymbolTable);
if (LineInfo.FileName != DILineInfo::BadString) {
if (Opts.Demangle)
LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
Result.push_back(LineInfo);
}
}

return Result;
}

Expected<std::vector<DILineInfo>>
LLVMSymbolizer::findSymbol(const ObjectFile &Obj, StringRef Symbol) {
return findSymbolCommon(Obj, Symbol);
}

Expected<std::vector<DILineInfo>>
LLVMSymbolizer::findSymbol(StringRef ModuleName, StringRef Symbol) {
return findSymbolCommon(ModuleName.str(), Symbol);
}

Expected<std::vector<DILineInfo>>
LLVMSymbolizer::findSymbol(ArrayRef<uint8_t> BuildID, StringRef Symbol) {
return findSymbolCommon(BuildID, Symbol);
}

void LLVMSymbolizer::flush() {
ObjectForUBPathAndArch.clear();
LRUBinaries.clear();
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/tools/llvm-symbolizer/Inputs/addr.inp
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
some text
something not a valid address
0x40054d
some text2
some text possibly a symbol
2 changes: 1 addition & 1 deletion llvm/test/tools/llvm-symbolizer/Inputs/discrim.inp
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ some text
0x4005b9
0x4005ce
0x4005d4
some more text
another text
19 changes: 19 additions & 0 deletions llvm/test/tools/llvm-symbolizer/Inputs/symbols.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// This file is a part of sources used to build `symbols.so`, which is used to
// test symbol location search made by llvm-symbolizer.
//
// Build instructions:
// $ mkdir /tmp/dbginfo
// $ cp symbols.h symbols.part1.cpp symbols.part2.cpp symbols.part3.c symbols.part4.c /tmp/dbginfo/
// $ cd /tmp/dbginfo
// $ gcc -osymbols.so -shared -fPIC -g symbols.part1.cpp symbols.part2.cpp symbols.part3.c symbols.part4.c


extern "C" {
extern int global_01;
int func_01();
int func_02(int);
}

template<typename T> T func_03(T x) {
return x + T(1);
}
25 changes: 25 additions & 0 deletions llvm/test/tools/llvm-symbolizer/Inputs/symbols.part1.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#include "symbols.h"

int global_01 = 22;

int static static_var = 0;

static int static_func_01(int x) {
static_var = x;
return global_01;
}

int func_01() {
int res = 1;
return res + static_func_01(22);
}

int func_04() {
static_var = 0;
return 22;
}

int func_04(int x) {
int res = static_var;
return res + func_03(x);
}
18 changes: 18 additions & 0 deletions llvm/test/tools/llvm-symbolizer/Inputs/symbols.part2.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#include "symbols.h"

int static static_var = 4;

static int static_func_01(int x) {
static_var--;
return x;
}

int func_02(int x) {
static_var = x;
return static_func_01(x);
}

int func_05(int x) {
int res = static_var;
return res + func_03(x);
}
12 changes: 12 additions & 0 deletions llvm/test/tools/llvm-symbolizer/Inputs/symbols.part3.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
static int static_func(int);
static int static_var = 0;

int static_func(int x) {
static_var++;
return static_var + x;
}

int func_06(int x) {
return static_func(x);
}

13 changes: 13 additions & 0 deletions llvm/test/tools/llvm-symbolizer/Inputs/symbols.part4.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
static int static_func(int);
static int static_var = 5;

int static_func(int x) {
static_var++;
return static_var + x;
}

int func_07(int x) {
static_var++;
return static_func(x);
}

Binary file not shown.
21 changes: 13 additions & 8 deletions llvm/test/tools/llvm-symbolizer/output-style-json-code.test
Original file line number Diff line number Diff line change
Expand Up @@ -25,39 +25,44 @@
# RUN: llvm-symbolizer --output-style=JSON --no-inlines -e %p/Inputs/addr.exe < %p/Inputs/addr.inp | \
# RUN: FileCheck %s --check-prefix=NO-INLINES --strict-whitespace --match-full-lines --implicit-check-not={{.}}
## Invalid first argument before any valid one.
# NO-INLINES:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
# NO-INLINES:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"something"}
## Resolve valid address.
# NO-INLINES-NEXT:{"Address":"0x40054d","ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":3,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"main","Line":3,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":2}]}
## Invalid argument after a valid one.
# NO-INLINES-NEXT:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
# NO-INLINES-NEXT:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"some"}

## This test case is testing stdin input, inlines by default.
# RUN: llvm-symbolizer --output-style=JSON -e %p/Inputs/addr.exe < %p/Inputs/addr.inp | \
# RUN: FileCheck %s --check-prefix=INLINE --strict-whitespace --match-full-lines --implicit-check-not={{.}}
## Invalid first argument before any valid one.
# INLINE:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
# INLINE:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"something"}
## Resolve valid address.
# INLINE-NEXT:{"Address":"0x40054d","ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":3,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"inctwo","Line":3,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":2},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"inc","Line":7,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":6},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"main","Line":14,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":12}]}
## Invalid argument after a valid one.
# INLINE-NEXT:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
# INLINE-NEXT:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"some"}

## Also check the last test case with llvm-adr2line.
## The expected result is the same with -f -i.
# RUN: llvm-addr2line --output-style=JSON -f -i -e %p/Inputs/addr.exe < %p/Inputs/addr.inp | \
# RUN: FileCheck %s --check-prefix=INLINE-A2L --strict-whitespace --match-full-lines --implicit-check-not={{.}}
## Invalid first argument before any valid one.
# INLINE-A2L:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
# INLINE-A2L:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"something"}
## Resolve valid address.
# INLINE-A2L-NEXT:{"Address":"0x40054d","ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":3,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"inctwo","Line":3,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":2},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"inc","Line":7,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":6},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"main","Line":14,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":12}]}
## Invalid argument after a valid one.
# INLINE-A2L-NEXT:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
# INLINE-A2L:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"some"}

## Note llvm-addr2line without -f does not print the function name in JSON too.
# RUN: llvm-addr2line --output-style=JSON -i -e %p/Inputs/addr.exe < %p/Inputs/addr.inp | \
# RUN: FileCheck %s --check-prefix=NO-FUNC-A2L --strict-whitespace --match-full-lines --implicit-check-not={{.}}
## Invalid first argument before any valid one.
# NO-FUNC-A2L:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
# NO-FUNC-A2L:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"something"}
## Resolve valid address.
# NO-FUNC-A2L-NEXT:{"Address":"0x40054d","ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":3,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"","Line":3,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":2},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"","Line":7,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":6},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"","Line":14,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":12}]}
## Invalid argument after a valid one.
# NO-FUNC-A2L-NEXT:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
# NO-FUNC-A2L-NEXT:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"some"}

## When a module offset is specified by a symbol, more than one source location can be found.
# RUN: llvm-symbolizer --output-style=JSON --no-inlines -e %p/Inputs/symbols.so "static_func" | \
# RUN: FileCheck %s --check-prefix=MULTIPLE --strict-whitespace --match-full-lines --implicit-check-not={{.}}
# MULTIPLE:[{"Loc":[{"Column":24,"Discriminator":0,"FileName":"/tmp/dbginfo{{/|\\\\}}symbols.part3.c","FunctionName":"static_func","Line":4,"StartAddress":"0x121d","StartFileName":"/tmp/dbginfo{{/|\\\\}}symbols.part3.c","StartLine":4},{"Column":24,"Discriminator":0,"FileName":"/tmp/dbginfo{{/|\\\\}}symbols.part4.c","FunctionName":"static_func","Line":4,"StartAddress":"0x125f","StartFileName":"/tmp/dbginfo{{/|\\\\}}symbols.part4.c","StartLine":4}],"ModuleName":"{{.*}}Inputs/symbols.so","SymName":"static_func"}]

0 comments on commit 2b27948

Please sign in to comment.