diff --git a/llvm/docs/CommandGuide/llvm-symbolizer.rst b/llvm/docs/CommandGuide/llvm-symbolizer.rst index ce4230d048c7a..72ebbf8f59ed1 100644 --- a/llvm/docs/CommandGuide/llvm-symbolizer.rst +++ b/llvm/docs/CommandGuide/llvm-symbolizer.rst @@ -183,6 +183,11 @@ OPTIONS Print just the file's name without any directories, instead of the absolute path. +.. option:: --build-id + + Look up the object using the given build ID, specified as a hexadecimal + string. Mutually exclusive with :option:`--obj`. + .. _llvm-symbolizer-opt-C: .. option:: --demangle, -C @@ -232,7 +237,8 @@ OPTIONS .. option:: --obj , --exe, -e Path to object file to be symbolized. If ``-`` is specified, read the object - directly from the standard input stream. + directly from the standard input stream. Mutually exclusive with + :option:`--build-id`. .. _llvm-symbolizer-opt-output-style: diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h index 30e55c5d410ae..6bbac31a21a98 100644 --- a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h @@ -13,6 +13,7 @@ #ifndef LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H #define LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H +#include "llvm/ADT/StringMap.h" #include "llvm/DebugInfo/Symbolize/DIFetcher.h" #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/Object/Binary.h" @@ -62,22 +63,32 @@ class LLVMSymbolizer { object::SectionedAddress ModuleOffset); Expected symbolizeCode(const std::string &ModuleName, object::SectionedAddress ModuleOffset); + Expected symbolizeCode(ArrayRef BuildID, + object::SectionedAddress ModuleOffset); Expected symbolizeInlinedCode(const ObjectFile &Obj, object::SectionedAddress ModuleOffset); Expected symbolizeInlinedCode(const std::string &ModuleName, object::SectionedAddress ModuleOffset); + Expected + symbolizeInlinedCode(ArrayRef BuildID, + object::SectionedAddress ModuleOffset); Expected symbolizeData(const ObjectFile &Obj, object::SectionedAddress ModuleOffset); Expected symbolizeData(const std::string &ModuleName, object::SectionedAddress ModuleOffset); + Expected symbolizeData(ArrayRef BuildID, + object::SectionedAddress ModuleOffset); Expected> symbolizeFrame(const ObjectFile &Obj, object::SectionedAddress ModuleOffset); Expected> symbolizeFrame(const std::string &ModuleName, object::SectionedAddress ModuleOffset); + Expected> + symbolizeFrame(ArrayRef BuildID, + object::SectionedAddress ModuleOffset); void flush(); static std::string @@ -117,6 +128,12 @@ class LLVMSymbolizer { getOrCreateModuleInfo(const std::string &ModuleName); Expected getOrCreateModuleInfo(const ObjectFile &Obj); + /// Returns a SymbolizableModule or an error if loading debug info failed. + /// Unlike the above, errors are reported each time, since they are more + /// likely to be transient. + Expected + getOrCreateModuleInfo(ArrayRef BuildID); + Expected createModuleInfo(const ObjectFile *Obj, std::unique_ptr Context, StringRef ModuleName); @@ -135,7 +152,8 @@ class LLVMSymbolizer { const std::string &DebuglinkName, uint32_t CRCHash, std::string &Result); - bool findDebugBinary(const ArrayRef BuildID, std::string &Result); + bool getOrFindDebugBinary(const ArrayRef BuildID, + std::string &Result); /// Returns pair of pointers to object and debug object. Expected getOrCreateObjectPair(const std::string &Path, @@ -149,6 +167,7 @@ class LLVMSymbolizer { std::map, std::less<>> Modules; + StringMap BuildIDPaths; /// Contains cached results of getOrCreateObjectPair(). std::map, ObjectPair> diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp index 08d51dbd56284..abb26ba8cc401 100644 --- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -80,6 +80,12 @@ LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, return symbolizeCodeCommon(ModuleName, ModuleOffset); } +Expected +LLVMSymbolizer::symbolizeCode(ArrayRef BuildID, + object::SectionedAddress ModuleOffset) { + return symbolizeCodeCommon(BuildID, ModuleOffset); +} + template Expected LLVMSymbolizer::symbolizeInlinedCodeCommon( const T &ModuleSpecifier, object::SectionedAddress ModuleOffset) { @@ -123,6 +129,12 @@ LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName, return symbolizeInlinedCodeCommon(ModuleName, ModuleOffset); } +Expected +LLVMSymbolizer::symbolizeInlinedCode(ArrayRef BuildID, + object::SectionedAddress ModuleOffset) { + return symbolizeInlinedCodeCommon(BuildID, ModuleOffset); +} + template Expected LLVMSymbolizer::symbolizeDataCommon(const T &ModuleSpecifier, @@ -162,6 +174,12 @@ LLVMSymbolizer::symbolizeData(const std::string &ModuleName, return symbolizeDataCommon(ModuleName, ModuleOffset); } +Expected +LLVMSymbolizer::symbolizeData(ArrayRef BuildID, + object::SectionedAddress ModuleOffset) { + return symbolizeDataCommon(BuildID, ModuleOffset); +} + template Expected> LLVMSymbolizer::symbolizeFrameCommon(const T &ModuleSpecifier, @@ -197,11 +215,18 @@ LLVMSymbolizer::symbolizeFrame(const std::string &ModuleName, return symbolizeFrameCommon(ModuleName, ModuleOffset); } +Expected> +LLVMSymbolizer::symbolizeFrame(ArrayRef BuildID, + object::SectionedAddress ModuleOffset) { + return symbolizeFrameCommon(BuildID, ModuleOffset); +} + void LLVMSymbolizer::flush() { ObjectForUBPathAndArch.clear(); BinaryForPath.clear(); ObjectPairForPathArch.clear(); Modules.clear(); + BuildIDPaths.clear(); } namespace { @@ -367,7 +392,7 @@ ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path, if (BuildID->size() < 2) return nullptr; std::string DebugBinaryPath; - if (!findDebugBinary(*BuildID, DebugBinaryPath)) + if (!getOrFindDebugBinary(*BuildID, DebugBinaryPath)) return nullptr; auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName); if (!DbgObjOrErr) { @@ -421,12 +446,29 @@ bool LLVMSymbolizer::findDebugBinary(const std::string &OrigPath, return false; } -bool LLVMSymbolizer::findDebugBinary(const ArrayRef BuildID, - std::string &Result) { +static StringRef getBuildIDStr(ArrayRef BuildID) { + return StringRef(reinterpret_cast(BuildID.data()), + BuildID.size()); +} + +bool LLVMSymbolizer::getOrFindDebugBinary(const ArrayRef BuildID, + std::string &Result) { + StringRef BuildIDStr = getBuildIDStr(BuildID); + auto I = BuildIDPaths.find(BuildIDStr); + if (I != BuildIDPaths.end()) { + Result = I->second; + return true; + } + auto recordPath = [&](StringRef Path) { + Result = Path.str(); + auto InsertResult = BuildIDPaths.insert({BuildIDStr, Result}); + assert(InsertResult.second); + }; + Optional Path; Path = LocalDIFetcher(Opts.DebugFileDirectory).fetchBuildID(BuildID); if (Path) { - Result = std::move(*Path); + recordPath(*Path); return true; } @@ -434,7 +476,7 @@ bool LLVMSymbolizer::findDebugBinary(const ArrayRef BuildID, for (const std::unique_ptr &Fetcher : DIFetchers) { Path = Fetcher->fetchBuildID(BuildID); if (Path) { - Result = std::move(*Path); + recordPath(*Path); return true; } } @@ -597,6 +639,17 @@ LLVMSymbolizer::getOrCreateModuleInfo(const ObjectFile &Obj) { return createModuleInfo(&Obj, std::move(Context), ObjName); } +Expected +LLVMSymbolizer::getOrCreateModuleInfo(ArrayRef BuildID) { + std::string Path; + if (!getOrFindDebugBinary(BuildID, Path)) { + return createStringError(errc::no_such_file_or_directory, + Twine("could not find build ID '") + + toHex(BuildID) + "'"); + } + return getOrCreateModuleInfo(Path); +} + namespace { // Undo these various manglings for Win32 extern "C" functions: diff --git a/llvm/test/tools/llvm-symbolizer/debuginfod-bad-build-id.test b/llvm/test/tools/llvm-symbolizer/debuginfod-bad-build-id.test new file mode 100644 index 0000000000000..2a6061e2f3595 --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/debuginfod-bad-build-id.test @@ -0,0 +1,3 @@ +RUN: not llvm-symbolizer --build-id=not_a_hex_string 0x1234 2>&1 | FileCheck %s + +CHECK: --build-id=: expected a build ID, but got 'not_a_hex_string' diff --git a/llvm/test/tools/llvm-symbolizer/debuginfod-build-id-and-obj.test b/llvm/test/tools/llvm-symbolizer/debuginfod-build-id-and-obj.test new file mode 100644 index 0000000000000..5b9927420af24 --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/debuginfod-build-id-and-obj.test @@ -0,0 +1,3 @@ +RUN: not llvm-symbolizer --build-id=abc --obj=bad 0x1234 2>&1 | FileCheck %s + +CHECK: error: cannot specify both --build-id and --obj diff --git a/llvm/test/tools/llvm-symbolizer/debuginfod-missing-build-id.test b/llvm/test/tools/llvm-symbolizer/debuginfod-missing-build-id.test new file mode 100644 index 0000000000000..847cfe7a8974f --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/debuginfod-missing-build-id.test @@ -0,0 +1,10 @@ +RUN: llvm-symbolizer --build-id=abad 0x1234 0x5678 > %t.stdout 2> %t.stderr +RUN: FileCheck %s --check-prefix=STDOUT < %t.stdout +RUN: FileCheck %s --check-prefix=STDERR < %t.stderr + +STDOUT: ?? +STDOUT: ??:0:0 +STDOUT: ?? +STDOUT: ??:0:0 + +STDERR-COUNT-2: LLVMSymbolizer: error reading file: could not find build ID 'ABAD' diff --git a/llvm/test/tools/llvm-symbolizer/debuginfod.test b/llvm/test/tools/llvm-symbolizer/debuginfod.test index 93160f395d39e..fb9aea377096a 100644 --- a/llvm/test/tools/llvm-symbolizer/debuginfod.test +++ b/llvm/test/tools/llvm-symbolizer/debuginfod.test @@ -25,3 +25,8 @@ RUN: %t/llvmcache-9800707741016212219 RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer \ RUN: --obj=%t/addr.exe 0x40054d | FileCheck %s --check-prefix=FOUND FOUND: {{[/\]+}}tmp{{[/\]+}}x.c:14:0 + +# This should also work if the build ID is provided. +RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer \ +RUN: --build-id=127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d | \ +RUN: FileCheck %s --check-prefix=FOUND diff --git a/llvm/tools/llvm-symbolizer/Opts.td b/llvm/tools/llvm-symbolizer/Opts.td index 6026e24d6ffa6..c07be62cac03c 100644 --- a/llvm/tools/llvm-symbolizer/Opts.td +++ b/llvm/tools/llvm-symbolizer/Opts.td @@ -21,6 +21,7 @@ defm adjust_vma : Eq<"adjust-vma", "Add specified offset to object file addresses">, MetaVarName<"">; def basenames : Flag<["--"], "basenames">, HelpText<"Strip directory names from paths">; +defm build_id : Eq<"build-id", "Build ID used to look up the object file">; defm debug_file_directory : Eq<"debug-file-directory", "Path to directory where to look for debug files">, MetaVarName<"">; defm default_arch : Eq<"default-arch", "Default architecture (for multi-arch objects)">, diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp index 0315413ea0c3c..e036a075e97b2 100644 --- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "Opts.inc" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" #include "llvm/DebugInfo/Symbolize/DIPrinter.h" @@ -103,8 +104,8 @@ enum class Command { Frame, }; -static bool parseCommand(StringRef BinaryName, bool IsAddr2Line, - StringRef InputString, Command &Cmd, +static bool parseCommand(StringRef BinaryName, ArrayRef BuildID, + bool IsAddr2Line, StringRef InputString, Command &Cmd, std::string &ModuleName, uint64_t &ModuleOffset) { const char kDelimiters[] = " \n\r"; ModuleName = ""; @@ -120,7 +121,7 @@ static bool parseCommand(StringRef BinaryName, bool IsAddr2Line, } const char *Pos = InputString.data(); // Skip delimiters and parse input filename (if needed). - if (BinaryName.empty()) { + if (BinaryName.empty() && BuildID.empty()) { Pos += strspn(Pos, kDelimiters); if (*Pos == '"' || *Pos == '\'') { char Quote = *Pos; @@ -149,31 +150,24 @@ static bool parseCommand(StringRef BinaryName, bool IsAddr2Line, return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset); } -static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA, - bool IsAddr2Line, OutputStyle Style, - StringRef InputString, LLVMSymbolizer &Symbolizer, - DIPrinter &Printer) { - Command Cmd; - std::string ModuleName; - uint64_t Offset = 0; - if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line, - StringRef(InputString), Cmd, ModuleName, Offset)) { - Printer.printInvalidCommand({ModuleName, None}, InputString); - return; - } - +template +void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd, + uint64_t Offset, uint64_t AdjustVMA, bool ShouldInline, + OutputStyle Style, LLVMSymbolizer &Symbolizer, + DIPrinter &Printer) { uint64_t AdjustedOffset = Offset - AdjustVMA; + object::SectionedAddress Address = {AdjustedOffset, + object::SectionedAddress::UndefSection}; if (Cmd == Command::Data) { - Expected ResOrErr = Symbolizer.symbolizeData( - ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection}); + Expected ResOrErr = Symbolizer.symbolizeData(ModuleSpec, Address); print({ModuleName, Offset}, ResOrErr, Printer); } else if (Cmd == Command::Frame) { - Expected> ResOrErr = Symbolizer.symbolizeFrame( - ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection}); + Expected> ResOrErr = + Symbolizer.symbolizeFrame(ModuleSpec, Address); print({ModuleName, Offset}, ResOrErr, Printer); - } else if (Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line)) { - Expected ResOrErr = Symbolizer.symbolizeInlinedCode( - ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection}); + } else if (ShouldInline) { + Expected ResOrErr = + Symbolizer.symbolizeInlinedCode(ModuleSpec, Address); print({ModuleName, Offset}, ResOrErr, Printer); } else if (Style == OutputStyle::GNU) { // With PrintFunctions == FunctionNameKind::LinkageName (default) @@ -182,8 +176,8 @@ static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA, // caller function in the inlining chain. This contradicts the existing // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only // the topmost function, which suits our needs better. - Expected ResOrErr = Symbolizer.symbolizeInlinedCode( - ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection}); + Expected ResOrErr = + Symbolizer.symbolizeInlinedCode(ModuleSpec, Address); Expected Res0OrErr = !ResOrErr ? Expected(ResOrErr.takeError()) @@ -191,12 +185,37 @@ static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA, : ResOrErr->getFrame(0)); print({ModuleName, Offset}, Res0OrErr, Printer); } else { - Expected ResOrErr = Symbolizer.symbolizeCode( - ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection}); + Expected ResOrErr = + Symbolizer.symbolizeCode(ModuleSpec, Address); print({ModuleName, Offset}, ResOrErr, Printer); } } +static void symbolizeInput(const opt::InputArgList &Args, + ArrayRef BuildID, uint64_t AdjustVMA, + bool IsAddr2Line, OutputStyle Style, + StringRef InputString, LLVMSymbolizer &Symbolizer, + DIPrinter &Printer) { + Command Cmd; + std::string ModuleName; + uint64_t Offset = 0; + if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), BuildID, IsAddr2Line, + StringRef(InputString), Cmd, ModuleName, Offset)) { + Printer.printInvalidCommand({ModuleName, None}, InputString); + return; + } + bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line); + if (!BuildID.empty()) { + assert(ModuleName.empty()); + std::string BuildIDStr = toHex(BuildID); + executeCommand(BuildIDStr, BuildID, Cmd, Offset, AdjustVMA, ShouldInline, + Style, Symbolizer, Printer); + } else { + executeCommand(ModuleName, ModuleName, Cmd, Offset, AdjustVMA, ShouldInline, + Style, Symbolizer, Printer); + } +} + static void printHelp(StringRef ToolName, const SymbolizerOptTable &Tbl, raw_ostream &OS) { const char HelpText[] = " [options] addresses..."; @@ -261,6 +280,22 @@ static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args, return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName; } +SmallVector parseBuildIDArg(const opt::InputArgList &Args, int ID) { + if (const opt::Arg *A = Args.getLastArg(ID)) { + StringRef V(A->getValue()); + std::string Bytes; + if (!tryGetFromHex(V, Bytes)) { + errs() << A->getSpelling() + ": expected a build ID, but got '" + V + + "'\n"; + exit(1); + } + ArrayRef BuildID(reinterpret_cast(Bytes.data()), + Bytes.size()); + return SmallVector(BuildID.begin(), BuildID.end()); + } + return {}; +} + int main(int argc, char **argv) { InitLLVM X(argc, argv); sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded); @@ -328,6 +363,12 @@ int main(int argc, char **argv) { Style = OutputStyle::LLVM; } + if (Args.hasArg(OPT_build_id_EQ) && Args.hasArg(OPT_obj_EQ)) { + errs() << "error: cannot specify both --build-id and --obj\n"; + return EXIT_FAILURE; + } + SmallVector BuildID = parseBuildIDArg(Args, OPT_build_id_EQ); + LLVMSymbolizer Symbolizer(Opts); // Look up symbols using the debuginfod client. @@ -353,15 +394,15 @@ int main(int argc, char **argv) { std::string StrippedInputString(InputString); llvm::erase_if(StrippedInputString, [](char c) { return c == '\r' || c == '\n'; }); - symbolizeInput(Args, AdjustVMA, IsAddr2Line, Style, StrippedInputString, - Symbolizer, *Printer); + symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style, + StrippedInputString, Symbolizer, *Printer); outs().flush(); } } else { Printer->listBegin(); for (StringRef Address : InputAddresses) - symbolizeInput(Args, AdjustVMA, IsAddr2Line, Style, Address, Symbolizer, - *Printer); + symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style, Address, + Symbolizer, *Printer); Printer->listEnd(); }