diff --git a/llvm/docs/CommandGuide/llvm-symbolizer.rst b/llvm/docs/CommandGuide/llvm-symbolizer.rst index 271bf90e7d8df9..dc8d72ae97625e 100644 --- a/llvm/docs/CommandGuide/llvm-symbolizer.rst +++ b/llvm/docs/CommandGuide/llvm-symbolizer.rst @@ -11,23 +11,26 @@ SYNOPSIS DESCRIPTION ----------- -:program:`llvm-symbolizer` reads object file names and addresses from the -command-line and prints corresponding source code locations to standard output. +:program:`llvm-symbolizer` reads input names and addresses from the command-line +and prints corresponding source code locations to standard output. If no address is specified on the command-line, it reads the addresses from -standard input. If no object file is specified on the command-line, but -addresses are, or if at any time an input value is not recognized, the input is -simply echoed to the output. +standard input. If no input name is specified on the command-line, but addresses +are, or if at any time an input value is not recognized, the input is simply +echoed to the output. + +Input names can be specified together with the addresses either on standard +input or as positional arguments on the command-line. By default, input names +are interpreted as object file paths. However, prefixing a name with +``BUILDID:`` states that it is a hex build ID rather than a path. This will look +up the corresponding debug binary. For consistency, prefixing a name with +``FILE:`` explicitly states that it is an object file path (the default). A positional argument or standard input value can be preceded by "DATA" or "CODE" to indicate that the address should be symbolized as data or executable code respectively. If neither is specified, "CODE" is assumed. DATA is symbolized as address and symbol size rather than line number. -Object files can be specified together with the addresses either on standard -input or as positional arguments on the command-line, following any "DATA" or -"CODE" prefix. - :program:`llvm-symbolizer` parses options from the environment variable ``LLVM_SYMBOLIZER_OPTS`` after parsing options from the command line. ``LLVM_SYMBOLIZER_OPTS`` is primarily useful for supplementing the command-line @@ -107,7 +110,7 @@ Example 3 - object specified with address: .. code-block:: console - $ llvm-symbolizer "test.elf 0x400490" "inlined.elf 0x400480" + $ llvm-symbolizer "test.elf 0x400490" "FILE:inlined.elf 0x400480" baz() /tmp/test.cpp:11:0 @@ -115,7 +118,7 @@ Example 3 - object specified with address: /tmp/test.cpp:8:10 $ cat addr2.txt - test.elf 0x4004a0 + FILE:test.elf 0x4004a0 inlined.elf 0x400480 $ llvm-symbolizer < addr2.txt @@ -125,11 +128,11 @@ Example 3 - object specified with address: foo() /tmp/test.cpp:8:10 -Example 4 - CODE and DATA prefixes: +Example 4 - BUILDID and FILE prefixes: .. code-block:: console - $ llvm-symbolizer --obj=test.elf "CODE 0x400490" "DATA 0x601028" + $ llvm-symbolizer "FILE:test.elf 0x400490" "DATA BUILDID:123456789abcdef 0x601028" baz() /tmp/test.cpp:11:0 @@ -137,17 +140,39 @@ Example 4 - CODE and DATA prefixes: 6295592 4 $ cat addr3.txt + FILE:test.elf 0x400490 + DATA BUILDID:123456789abcdef 0x601028 + + $ llvm-symbolizer < addr3.txt + baz() + /tmp/test.cpp:11:0 + + bar + 6295592 4 + +Example 5 - CODE and DATA prefixes: + +.. code-block:: console + + $ llvm-symbolizer --obj=test.elf "CODE 0x400490" "DATA 0x601028" + baz() + /tmp/test.cpp:11:0 + + bar + 6295592 4 + + $ cat addr4.txt CODE test.elf 0x4004a0 DATA inlined.elf 0x601028 - $ llvm-symbolizer < addr3.txt + $ llvm-symbolizer < addr4.txt main /tmp/test.cpp:15:0 bar 6295592 4 -Example 5 - path-style options: +Example 6 - path-style options: This example uses the same source file as above, but the source file's full path is /tmp/foo/test.cpp and is compiled as follows. The first case diff --git a/llvm/test/tools/llvm-symbolizer/debuginfod.test b/llvm/test/tools/llvm-symbolizer/debuginfod.test index 34310c48c5103a..a9b984aad7b05b 100644 --- a/llvm/test/tools/llvm-symbolizer/debuginfod.test +++ b/llvm/test/tools/llvm-symbolizer/debuginfod.test @@ -27,11 +27,52 @@ RUN: --obj=%t/addr.exe 0x40054d --debuginfod | \ RUN: FileCheck %s --check-prefix=FOUND FOUND: {{[/\]+}}tmp{{[/\]+}}x.c:14:0 -# This should also work if the build ID is provided. +# This should also work if the build ID is provided via flag. RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer \ RUN: --build-id=127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d | \ RUN: FileCheck %s --check-prefix=FOUND +# This should also work if the build ID is provided via stdin. +RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer \ +RUN: "BUILDID:127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d" | \ +RUN: FileCheck %s --check-prefix=FOUND + +# Passing BUILDID twice is a syntax error. +RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer \ +RUN: "BUILDID:BUILDID:127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d" | \ +RUN: FileCheck %s --check-prefix=BUILDIDBUILDID +BUILDIDBUILDID: BUILDID:BUILDID:127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d + +# CODE should work preceding build ID. +RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer \ +RUN: "CODE BUILDID:127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d" | \ +RUN: FileCheck %s --check-prefix=FOUND + # The symbolizer shouldn't call the debuginfod library by default with no URLs. RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer --print-address \ RUN: --obj=%t/addr.exe 0x40054d | FileCheck %s --check-prefix=NOTFOUND + +# The symbolizer shouldn't call the debuginfod library if explicitly disabled. +RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer \ +RUN: --no-debuginfod \ +RUN: "BUILDID:127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d" | \ +RUN: FileCheck %s --check-prefix=NOTHINGFOUND +NOTHINGFOUND: ?? +NOTHINGFOUND-NEXT: ??:0:0 + +# BUILDID shouldn't be parsed if --obj is given, just like regular filenames. +RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer \ +RUN: --obj=%t/addr.exe \ +RUN: "BUILDID:127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d" | \ +RUN: FileCheck %s --check-prefix=BUILDIDIGNORED +BUILDIDIGNORED: BUILDID:127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d + +# Providing both BUILDID and FILE is a syntax error. +RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer \ +RUN: "BUILDID:FILE:127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d" | \ +RUN: FileCheck %s --check-prefix=BUILDIDFILE +BUILDIDFILE: BUILDID:FILE:127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d +RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer \ +RUN: "FILE:BUILDID:127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d" | \ +RUN: FileCheck %s --check-prefix=FILEBUILDID +FILEBUILDID: FILE:BUILDID:127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d diff --git a/llvm/test/tools/llvm-symbolizer/file-prefix.test b/llvm/test/tools/llvm-symbolizer/file-prefix.test new file mode 100644 index 00000000000000..a309914118050f --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/file-prefix.test @@ -0,0 +1,9 @@ +# The FILE prefix acts as a no-op, but it provides consistency with BUILDID. +RUN: llvm-symbolizer "CODE FILE:%p/Inputs/addr.exe 0x40054d" | \ +RUN: FileCheck %s --check-prefix=FOUND +FOUND: {{[/\]+}}tmp{{[/\]+}}x.c:14:0 + +# Passing FILE twice is a syntax error. +RUN: llvm-symbolizer "CODE FILE:FILE:%p/Inputs/addr.exe 0x40054d" | \ +RUN: FileCheck %s --check-prefix=FILEFILE +FILEFILE: CODE FILE:FILE:{{.*}}/Inputs/addr.exe 0x40054d diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp index d77ba0672eb7a1..c9792788ae6c08 100644 --- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -106,9 +106,31 @@ enum class Command { Frame, }; -static bool parseCommand(StringRef BinaryName, ArrayRef BuildID, - bool IsAddr2Line, StringRef InputString, Command &Cmd, - std::string &ModuleName, uint64_t &ModuleOffset) { +static void enableDebuginfod(LLVMSymbolizer &Symbolizer) { + static bool IsEnabled = false; + if (IsEnabled) + return; + IsEnabled = true; + // Look up symbols using the debuginfod client. + Symbolizer.addDIFetcher(std::make_unique()); + // The HTTPClient must be initialized for use by the debuginfod client. + HTTPClient::initialize(); +} + +static SmallVector parseBuildID(StringRef Str) { + std::string Bytes; + if (!tryGetFromHex(Str, Bytes)) + return {}; + ArrayRef BuildID(reinterpret_cast(Bytes.data()), + Bytes.size()); + return SmallVector(BuildID.begin(), BuildID.end()); +} + +static bool parseCommand(StringRef BinaryName, bool IsAddr2Line, + StringRef InputString, Command &Cmd, + std::string &ModuleName, + SmallVectorImpl &BuildID, + uint64_t &ModuleOffset) { const char kDelimiters[] = " \n\r"; ModuleName = ""; if (InputString.consume_front("CODE ")) { @@ -121,9 +143,31 @@ static bool parseCommand(StringRef BinaryName, ArrayRef BuildID, // If no cmd, assume it's CODE. Cmd = Command::Code; } - const char *Pos = InputString.data(); + + const char *Pos; // Skip delimiters and parse input filename (if needed). if (BinaryName.empty() && BuildID.empty()) { + bool HasFilePrefix = false; + bool HasBuildIDPrefix = false; + while (true) { + if (InputString.consume_front("FILE:")) { + if (HasFilePrefix) + return false; + HasFilePrefix = true; + continue; + } + if (InputString.consume_front("BUILDID:")) { + if (HasBuildIDPrefix) + return false; + HasBuildIDPrefix = true; + continue; + } + break; + } + if (HasFilePrefix && HasBuildIDPrefix) + return false; + + Pos = InputString.data(); Pos += strspn(Pos, kDelimiters); if (*Pos == '"' || *Pos == '\'') { char Quote = *Pos; @@ -138,7 +182,14 @@ static bool parseCommand(StringRef BinaryName, ArrayRef BuildID, ModuleName = std::string(Pos, NameLength); Pos += NameLength; } + if (HasBuildIDPrefix) { + BuildID = parseBuildID(ModuleName); + if (BuildID.empty()) + return false; + ModuleName.clear(); + } } else { + Pos = InputString.data(); ModuleName = BinaryName.str(); } // Skip delimiters and parse module offset. @@ -195,21 +246,24 @@ void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd, } static void symbolizeInput(const opt::InputArgList &Args, - ArrayRef BuildID, uint64_t AdjustVMA, - bool IsAddr2Line, OutputStyle Style, - StringRef InputString, LLVMSymbolizer &Symbolizer, - DIPrinter &Printer) { + ArrayRef IncomingBuildID, + uint64_t AdjustVMA, bool IsAddr2Line, + OutputStyle Style, StringRef InputString, + LLVMSymbolizer &Symbolizer, DIPrinter &Printer) { Command Cmd; std::string ModuleName; + SmallVector BuildID(IncomingBuildID.begin(), IncomingBuildID.end()); uint64_t Offset = 0; - if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), BuildID, IsAddr2Line, - StringRef(InputString), Cmd, ModuleName, Offset)) { + if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line, + StringRef(InputString), Cmd, ModuleName, BuildID, Offset)) { Printer.printInvalidCommand({ModuleName, None}, InputString); return; } bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line); if (!BuildID.empty()) { assert(ModuleName.empty()); + if (!Args.hasArg(OPT_no_debuginfod)) + enableDebuginfod(Symbolizer); std::string BuildIDStr = toHex(BuildID); executeCommand(BuildIDStr, BuildID, Cmd, Offset, AdjustVMA, ShouldInline, Style, Symbolizer, Printer); @@ -283,43 +337,23 @@ static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args, return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName; } -SmallVector parseBuildIDArg(const opt::InputArgList &Args, int ID) { - if (const opt::Arg *A = Args.getLastArg(ID)) { - StringRef V(A->getValue()); - std::string Bytes; - if (!tryGetFromHex(V, Bytes)) { - errs() << A->getSpelling() + ": expected a build ID, but got '" + V + - "'\n"; - exit(1); - } - ArrayRef BuildID(reinterpret_cast(Bytes.data()), - Bytes.size()); - return SmallVector(BuildID.begin(), BuildID.end()); +static SmallVector parseBuildIDArg(const opt::InputArgList &Args, + int ID) { + const opt::Arg *A = Args.getLastArg(ID); + if (!A) + return {}; + + StringRef V(A->getValue()); + SmallVector BuildID = parseBuildID(V); + if (BuildID.empty()) { + errs() << A->getSpelling() + ": expected a build ID, but got '" + V + "'\n"; + exit(1); } - return {}; + return BuildID; } ExitOnError ExitOnErr; -static bool shouldUseDebuginfodByDefault(ArrayRef BuildID) { - // If the user explicitly specified a build ID, the usual way to find it is - // debuginfod. - if (!BuildID.empty()) - return true; - - // A debuginfod lookup could succeed if a HTTP client is available and at - // least one backing URL is configured. - if (HTTPClient::isAvailable() && - !ExitOnErr(getDefaultDebuginfodUrls()).empty()) - return true; - - // A debuginfod lookup could also succeed if something were present in the - // cache directory, but it would be surprising to enable debuginfod on this - // basis alone. To use existing caches in an "offline" fashion, the debuginfod - // flag must be set. - return false; -} - int main(int argc, char **argv) { InitLLVM X(argc, argv); sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded); @@ -397,13 +431,14 @@ int main(int argc, char **argv) { LLVMSymbolizer Symbolizer(Opts); + // A debuginfod lookup could succeed if a HTTP client is available and at + // least one backing URL is configured. + bool ShouldUseDebuginfodByDefault = + HTTPClient::isAvailable() && + !ExitOnErr(getDefaultDebuginfodUrls()).empty(); if (Args.hasFlag(OPT_debuginfod, OPT_no_debuginfod, - shouldUseDebuginfodByDefault(BuildID))) { - // Look up symbols using the debuginfod client. - Symbolizer.addDIFetcher(std::make_unique()); - // The HTTPClient must be initialized for use by the debuginfod client. - HTTPClient::initialize(); - } + ShouldUseDebuginfodByDefault)) + enableDebuginfod(Symbolizer); std::unique_ptr Printer; if (Style == OutputStyle::GNU)