4 changes: 2 additions & 2 deletions llvm/test/tools/llvm-symbolizer/output-style-json-data.test
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@

## Handle invalid argument.
# RUN: llvm-symbolizer "DATA tmp.o Z" --output-style=JSON | \
# RUN: FileCheck %s --check-prefix=INVARG --strict-whitespace --match-full-lines --implicit-check-not={{.}}
# INVARG:[{"ModuleName":"tmp.o","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}]
# RUN: FileCheck %s -DMSG=%errc_ENOENT --check-prefix=INVARG --strict-whitespace --match-full-lines --implicit-check-not={{.}}
# INVARG:[{"Error":{"Message":"[[MSG]]"},"ModuleName":"tmp.o","SymName":"Z"}]

# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/tools/llvm-symbolizer/output-style-json-frame.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@

;; Handle invalid argument.
; RUN: llvm-symbolizer "FRAME tmp.o Z" --output-style=JSON | \
; RUN: FileCheck %s --check-prefix=INVARG --strict-whitespace --match-full-lines --implicit-check-not={{.}}
; INVARG:[{"ModuleName":"tmp.o","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}]
; RUN: FileCheck %s -DMSG=%errc_ENOENT --check-prefix=INVARG --strict-whitespace --match-full-lines --implicit-check-not={{.}}
; INVARG:[{"Error":{"Message":"[[MSG]]"},"ModuleName":"tmp.o","SymName":"Z"}]

; RUN: llc -filetype=obj -o %t.o %s

Expand Down
65 changes: 65 additions & 0 deletions llvm/test/tools/llvm-symbolizer/symbol-search.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# This test checks the case when an address is specified by a symbol name rather
# than a number.
#
# It uses ELF shared object `Inputs/symbols.so` built for x86_64 using
# the instructions from `Inputs/symbols.h`.

# Show that the "CODE" command supports search by symbol name.
RUN: llvm-addr2line --obj=%p/Inputs/symbols.so "CODE func_01" | FileCheck --check-prefix=CODE-CMD %s
RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so "CODE func_01" | FileCheck --check-prefix=CODE-CMD %s
CODE-CMD: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:12

# Check if a symbol name can be specified on the command-line.
RUN: llvm-addr2line -e %p/Inputs/symbols.so func_01 | FileCheck --check-prefix=SYMB %s
RUN: llvm-symbolizer -e %p/Inputs/symbols.so func_01 | FileCheck --check-prefix=SYMB %s
SYMB: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:12

# Check that if a symbol has a space in its name, ignore everything from the space onwards.
RUN: llvm-addr2line -e %p/Inputs/symbols.so "func_01 ignored text" | FileCheck --check-prefix=SYMB %s
RUN: llvm-symbolizer -e %p/Inputs/symbols.so "func_01 ignored text" | FileCheck --check-prefix=SYMB %s

# Show that a symbol name may be resolved to more than one location.
RUN: llvm-addr2line -e %p/Inputs/symbols.so static_func | FileCheck --check-prefix=SYMB-MULTI %s
SYMB-MULTI: /tmp/dbginfo{{[/\]+}}symbols.part3.c:4
SYMB-MULTI-NEXT: /tmp/dbginfo{{[/\]+}}symbols.part4.c:4

# Show that if a symbol is not found, a special mark is printed.
RUN: llvm-addr2line --obj=%p/Inputs/symbols.so func_666 | FileCheck --check-prefix=NONEXISTENT %s
RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so func_666 | FileCheck --check-prefix=NONEXISTENT %s
NONEXISTENT: ??

# Show that more than one symbol may be specified.
RUN: llvm-addr2line --obj=%p/Inputs/symbols.so func_01 func_02 | FileCheck --check-prefix=FUNCS %s
RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so func_01 func_02 | FileCheck --check-prefix=FUNCS %s
FUNCS: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:12
FUNCS: /tmp/dbginfo{{[/\]+}}symbols.part2.cpp:10

# Show that C++ mangled names may be specified.
RUN: llvm-addr2line --obj=%p/Inputs/symbols.so _ZL14static_func_01i | FileCheck --check-prefix=MULTI-CXX %s
RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so _ZL14static_func_01i | FileCheck --check-prefix=MULTI-CXX %s
MULTI-CXX: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:7
MULTI-CXX: /tmp/dbginfo{{[/\]+}}symbols.part2.cpp:5

# Show that containing function name can be printed in mangled form.
RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so --no-demangle _Z7func_04i | FileCheck --check-prefix=MANGLED %s
RUN: llvm-addr2line --obj=%p/Inputs/symbols.so -f _Z7func_04i | FileCheck --check-prefix=MANGLED %s
MANGLED: _Z7func_04i
MANGLED-NEXT: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:22

# Show that containing function name can be printed in demangled form.
RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so _Z7func_04i | FileCheck --check-prefix=NOTMANGLED %s
RUN: llvm-addr2line --obj=%p/Inputs/symbols.so -f --demangle _Z7func_04i | FileCheck --check-prefix=NOTMANGLED %s
NOTMANGLED: func_04(int)
NOTMANGLED-NEXT: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:22

# Show that both the symbol and input file can be specified in the search string on the command-line.
RUN: llvm-addr2line "%p/Inputs/symbols.so func_01" | FileCheck --check-prefix=SYMBIN %s
RUN: llvm-symbolizer "%p/Inputs/symbols.so func_01" | FileCheck --check-prefix=SYMBIN %s
SYMBIN: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:12

# Show that the case of missing input file specified in the search string on the command-line is properly treated.
RUN: llvm-addr2line "%p/Inputs/666.so func_01" 2> %t.1.stderr | FileCheck --check-prefix=NONEXISTENT %s
RUN: FileCheck --input-file=%t.1.stderr --check-prefix=BINARY-NOT-FOUND -DMSG=%errc_ENOENT %s
RUN: llvm-symbolizer "%p/Inputs/666.so func_01" 2> %t.2.stderr | FileCheck --check-prefix=NONEXISTENT %s
RUN: FileCheck --input-file=%t.2.stderr --check-prefix=BINARY-NOT-FOUND -DMSG=%errc_ENOENT %s
BINARY-NOT-FOUND: error: '{{.*}}666.so': [[MSG]]
58 changes: 33 additions & 25 deletions llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ static Error makeStringError(StringRef Msg) {
static Error parseCommand(StringRef BinaryName, bool IsAddr2Line,
StringRef InputString, Command &Cmd,
std::string &ModuleName, object::BuildID &BuildID,
uint64_t &ModuleOffset) {
StringRef &Symbol, uint64_t &ModuleOffset) {
ModuleName = BinaryName;
if (InputString.consume_front("CODE ")) {
Cmd = Command::Code;
Expand Down Expand Up @@ -224,42 +224,52 @@ static Error parseCommand(StringRef BinaryName, bool IsAddr2Line,
return makeStringError("no input filename has been specified");
}

// Parse module offset.
// Parse module offset, which can be specified as a number or as a symbol.
InputString = InputString.ltrim();
if (InputString.empty())
return makeStringError("no module offset has been specified");

// If input string contains a space, ignore everything after it. This behavior
// is consistent with GNU addr2line.
int OffsetLength = InputString.find_first_of(" \n\r");
StringRef Offset = InputString.substr(0, OffsetLength);

// GNU addr2line assumes the offset is hexadecimal and allows a redundant
// "0x" or "0X" prefix; do the same for compatibility.
if (IsAddr2Line)
Offset.consume_front("0x") || Offset.consume_front("0X");

// If the input is not a valid module offset, it is not an error, but its
// lookup does not make sense. Return error of different kind to distinguish
// from error or success.
if (Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset))
return errorCodeToError(errc::invalid_argument);
// If the input is not a number, treat it is a symbol.
if (Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset)) {
Symbol = Offset;
ModuleOffset = 0;
}

return Error::success();
}

template <typename T>
void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
uint64_t Offset, uint64_t AdjustVMA, bool ShouldInline,
OutputStyle Style, LLVMSymbolizer &Symbolizer,
DIPrinter &Printer) {
StringRef Symbol, uint64_t Offset, uint64_t AdjustVMA,
bool ShouldInline, OutputStyle Style,
LLVMSymbolizer &Symbolizer, DIPrinter &Printer) {
uint64_t AdjustedOffset = Offset - AdjustVMA;
object::SectionedAddress Address = {AdjustedOffset,
object::SectionedAddress::UndefSection};
Request SymRequest = {ModuleName, Offset};
Request SymRequest = {
ModuleName, Symbol.empty() ? std::make_optional(Offset) : std::nullopt,
Symbol};
if (Cmd == Command::Data) {
Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(ModuleSpec, Address);
print(SymRequest, ResOrErr, Printer);
} else if (Cmd == Command::Frame) {
Expected<std::vector<DILocal>> ResOrErr =
Symbolizer.symbolizeFrame(ModuleSpec, Address);
print(SymRequest, ResOrErr, Printer);
} else if (!Symbol.empty()) {
Expected<std::vector<DILineInfo>> ResOrErr =
Symbolizer.findSymbol(ModuleSpec, Symbol);
print(SymRequest, ResOrErr, Printer);
} else if (ShouldInline) {
Expected<DIInliningInfo> ResOrErr =
Symbolizer.symbolizeInlinedCode(ModuleSpec, Address);
Expand Down Expand Up @@ -288,7 +298,7 @@ void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
}

static void printUnknownLineInfo(std::string ModuleName, DIPrinter &Printer) {
Request SymRequest = {ModuleName, std::nullopt};
Request SymRequest = {ModuleName, std::nullopt, StringRef()};
Printer.print(SymRequest, DILineInfo());
}

Expand All @@ -301,16 +311,14 @@ static void symbolizeInput(const opt::InputArgList &Args,
std::string ModuleName;
object::BuildID BuildID(IncomingBuildID.begin(), IncomingBuildID.end());
uint64_t Offset = 0;
StringRef Symbol;
if (Error E = parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,
StringRef(InputString), Cmd, ModuleName, BuildID,
Offset)) {
handleAllErrors(
std::move(E),
[&](const StringError &EI) {
printError(EI, InputString);
printUnknownLineInfo(ModuleName, Printer);
},
[&](const ECError &EI) { printUnknownLineInfo(ModuleName, Printer); });
Symbol, Offset)) {
handleAllErrors(std::move(E), [&](const StringError &EI) {
printError(EI, InputString);
printUnknownLineInfo(ModuleName, Printer);
});
return;
}
bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line);
Expand All @@ -319,11 +327,11 @@ static void symbolizeInput(const opt::InputArgList &Args,
if (!Args.hasArg(OPT_no_debuginfod))
enableDebuginfod(Symbolizer, Args);
std::string BuildIDStr = toHex(BuildID);
executeCommand(BuildIDStr, BuildID, Cmd, Offset, AdjustVMA, ShouldInline,
Style, Symbolizer, Printer);
executeCommand(BuildIDStr, BuildID, Cmd, Symbol, Offset, AdjustVMA,
ShouldInline, Style, Symbolizer, Printer);
} else {
executeCommand(ModuleName, ModuleName, Cmd, Offset, AdjustVMA, ShouldInline,
Style, Symbolizer, Printer);
executeCommand(ModuleName, ModuleName, Cmd, Symbol, Offset, AdjustVMA,
ShouldInline, Style, Symbolizer, Printer);
}
}

Expand Down Expand Up @@ -527,7 +535,7 @@ int llvm_symbolizer_main(int argc, char **argv, const llvm::ToolContext &) {
if (auto *Arg = Args.getLastArg(OPT_obj_EQ); Arg) {
auto Status = Symbolizer.getOrCreateModuleInfo(Arg->getValue());
if (!Status) {
Request SymRequest = {Arg->getValue(), 0};
Request SymRequest = {Arg->getValue(), 0, StringRef()};
handleAllErrors(Status.takeError(), [&](const ErrorInfoBase &EI) {
Printer->printError(SymRequest, EI);
});
Expand Down
4 changes: 4 additions & 0 deletions llvm/unittests/ProfileData/MemProfTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ using ::llvm::DIInliningInfo;
using ::llvm::DILineInfo;
using ::llvm::DILineInfoSpecifier;
using ::llvm::DILocal;
using ::llvm::StringRef;
using ::llvm::memprof::CallStackMap;
using ::llvm::memprof::Frame;
using ::llvm::memprof::FrameId;
Expand Down Expand Up @@ -53,6 +54,9 @@ class MockSymbolizer : public SymbolizableModule {
virtual std::vector<DILocal> symbolizeFrame(SectionedAddress) const {
llvm_unreachable("unused");
}
virtual std::vector<SectionedAddress> findSymbol(StringRef Symbol) const {
llvm_unreachable("unused");
}
virtual bool isWin32Module() const { llvm_unreachable("unused"); }
virtual uint64_t getModulePreferredBase() const {
llvm_unreachable("unused");
Expand Down