diff --git a/llvm/docs/CommandGuide/llvm-symbolizer.rst b/llvm/docs/CommandGuide/llvm-symbolizer.rst index fe5df077b4566..59c0ab6d196ac 100644 --- a/llvm/docs/CommandGuide/llvm-symbolizer.rst +++ b/llvm/docs/CommandGuide/llvm-symbolizer.rst @@ -14,7 +14,7 @@ DESCRIPTION :program:`llvm-symbolizer` reads input names and addresses from the command-line and prints corresponding source code locations to standard output. It can also symbolize logs containing :doc:`Symbolizer Markup ` via -:option:`--filter-markup`. +:option:`--filter-markup`. Addresses may be specified as numbers or symbol names. If no address is specified on the command-line, it reads the addresses from standard input. If no input name is specified on the command-line, but addresses @@ -196,6 +196,17 @@ shows --relativenames. main foo/test.cpp:15:0 +Example 7 - Addresses as symbol names: + +.. code-block:: console + + $ llvm-symbolizer --obj=test.elf main + main + /tmp/test.cpp:14:0 + $ llvm-symbolizer --obj=test.elf "CODE foz" + foz + /tmp/test.h:1:0 + OPTIONS ------- diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 660bb4e70a5a7..8317056ffaf5a 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -180,6 +180,8 @@ Changes to the LLVM tools * ``llvm-nm`` now supports the ``--line-numbers`` (``-l``) option to use debugging information to print symbols' filenames and line numbers. +* llvm-symbolizer and llvm-addr2line now support addresses specified as symbol names. + Changes to LLDB --------------------------------- diff --git a/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h b/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h index 026f917ced5bc..72ffdd29f1b72 100644 --- a/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h @@ -34,6 +34,7 @@ class SourceCode; struct Request { StringRef ModuleName; std::optional Address; + StringRef Symbol; }; class DIPrinter { @@ -46,6 +47,8 @@ class DIPrinter { virtual void print(const Request &Request, const DIGlobal &Global) = 0; virtual void print(const Request &Request, const std::vector &Locals) = 0; + virtual void print(const Request &Request, + const std::vector &Locations) = 0; virtual bool printError(const Request &Request, const ErrorInfoBase &ErrorInfo) = 0; @@ -91,6 +94,8 @@ class PlainPrinterBase : public DIPrinter { void print(const Request &Request, const DIGlobal &Global) override; void print(const Request &Request, const std::vector &Locals) override; + void print(const Request &Request, + const std::vector &Locations) override; bool printError(const Request &Request, const ErrorInfoBase &ErrorInfo) override; @@ -141,6 +146,8 @@ class JSONPrinter : public DIPrinter { void print(const Request &Request, const DIGlobal &Global) override; void print(const Request &Request, const std::vector &Locals) override; + void print(const Request &Request, + const std::vector &Locations) override; bool printError(const Request &Request, const ErrorInfoBase &ErrorInfo) override; diff --git a/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h b/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h index 51e92b83eadba..255932d35cda1 100644 --- a/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h @@ -36,6 +36,9 @@ class SymbolizableModule { virtual std::vector symbolizeFrame(object::SectionedAddress ModuleOffset) const = 0; + virtual std::vector + findSymbol(StringRef Symbol) const = 0; + // Return true if this is a 32-bit x86 PE COFF module. virtual bool isWin32Module() const = 0; diff --git a/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h b/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h index 075dbe3e0e372..311fa201d900e 100644 --- a/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h @@ -43,6 +43,8 @@ class SymbolizableObjectFile : public SymbolizableModule { DIGlobal symbolizeData(object::SectionedAddress ModuleOffset) const override; std::vector symbolizeFrame(object::SectionedAddress ModuleOffset) const override; + std::vector + findSymbol(StringRef Symbol) const override; // Return true if this is a 32-bit x86 PE COFF module. bool isWin32Module() const override; diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h index 99a7f219baaa0..bc4aa74073a65 100644 --- a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h @@ -104,6 +104,14 @@ class LLVMSymbolizer { Expected> symbolizeFrame(ArrayRef BuildID, object::SectionedAddress ModuleOffset); + + Expected> findSymbol(const ObjectFile &Obj, + StringRef Symbol); + Expected> findSymbol(StringRef ModuleName, + StringRef Symbol); + Expected> findSymbol(ArrayRef BuildID, + StringRef Symbol); + void flush(); // Evict entries from the binary cache until it is under the maximum size @@ -146,6 +154,9 @@ class LLVMSymbolizer { Expected> symbolizeFrameCommon(const T &ModuleSpecifier, object::SectionedAddress ModuleOffset); + template + Expected> findSymbolCommon(const T &ModuleSpecifier, + StringRef Symbol); Expected getOrCreateModuleInfo(const ObjectFile &Obj); diff --git a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp index dcf5eee2bb32b..d7b33ce1d0f06 100644 --- a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp @@ -260,6 +260,17 @@ void PlainPrinterBase::print(const Request &Request, printFooter(); } +void PlainPrinterBase::print(const Request &Request, + const std::vector &Locations) { + if (Locations.empty()) { + print(Request, DILineInfo()); + } else { + for (const DILineInfo &L : Locations) + print(L, false); + printFooter(); + } +} + bool PlainPrinterBase::printError(const Request &Request, const ErrorInfoBase &ErrorInfo) { ErrHandler(ErrorInfo, Request.ModuleName); @@ -273,6 +284,8 @@ static std::string toHex(uint64_t V) { static json::Object toJSON(const Request &Request, StringRef ErrorMsg = "") { json::Object Json({{"ModuleName", Request.ModuleName.str()}}); + if (!Request.Symbol.empty()) + Json["SymName"] = Request.Symbol.str(); if (Request.Address) Json["Address"] = toHex(*Request.Address); if (!ErrorMsg.empty()) @@ -362,6 +375,19 @@ void JSONPrinter::print(const Request &Request, printJSON(std::move(Json)); } +void JSONPrinter::print(const Request &Request, + const std::vector &Locations) { + json::Array Definitions; + for (const DILineInfo &L : Locations) + Definitions.push_back(toJSON(L)); + json::Object Json = toJSON(Request); + Json["Loc"] = std::move(Definitions); + if (ObjectList) + ObjectList->push_back(std::move(Json)); + else + printJSON(std::move(Json)); +} + bool JSONPrinter::printError(const Request &Request, const ErrorInfoBase &ErrorInfo) { json::Object Json = toJSON(Request, ErrorInfo.message()); diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp index 6b8068a531c05..697303038507a 100644 --- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp @@ -351,6 +351,19 @@ std::vector SymbolizableObjectFile::symbolizeFrame( return DebugInfoContext->getLocalsForAddress(ModuleOffset); } +std::vector +SymbolizableObjectFile::findSymbol(StringRef Symbol) const { + std::vector Result; + for (const SymbolDesc &Sym : Symbols) { + if (Sym.Name.equals(Symbol)) { + object::SectionedAddress A{Sym.Addr, + getModuleSectionIndexForAddress(Sym.Addr)}; + Result.push_back(A); + } + } + return Result; +} + /// Search for the first occurence of specified Address in ObjectFile. uint64_t SymbolizableObjectFile::getModuleSectionIndexForAddress( uint64_t Address) const { diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp index 517f1e7dc284f..36d112a5f3fb2 100644 --- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -231,6 +231,50 @@ LLVMSymbolizer::symbolizeFrame(ArrayRef BuildID, return symbolizeFrameCommon(BuildID, ModuleOffset); } +template +Expected> +LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol) { + auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); + if (!InfoOrErr) + return InfoOrErr.takeError(); + + SymbolizableModule *Info = *InfoOrErr; + std::vector Result; + + // A null module means an error has already been reported. Return an empty + // result. + if (!Info) + return Result; + + for (object::SectionedAddress A : Info->findSymbol(Symbol)) { + DILineInfo LineInfo = Info->symbolizeCode( + A, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions), + Opts.UseSymbolTable); + if (LineInfo.FileName != DILineInfo::BadString) { + if (Opts.Demangle) + LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info); + Result.push_back(LineInfo); + } + } + + return Result; +} + +Expected> +LLVMSymbolizer::findSymbol(const ObjectFile &Obj, StringRef Symbol) { + return findSymbolCommon(Obj, Symbol); +} + +Expected> +LLVMSymbolizer::findSymbol(StringRef ModuleName, StringRef Symbol) { + return findSymbolCommon(ModuleName.str(), Symbol); +} + +Expected> +LLVMSymbolizer::findSymbol(ArrayRef BuildID, StringRef Symbol) { + return findSymbolCommon(BuildID, Symbol); +} + void LLVMSymbolizer::flush() { ObjectForUBPathAndArch.clear(); LRUBinaries.clear(); diff --git a/llvm/test/tools/llvm-symbolizer/Inputs/addr.inp b/llvm/test/tools/llvm-symbolizer/Inputs/addr.inp index b5e146b114e25..b19992175bf99 100644 --- a/llvm/test/tools/llvm-symbolizer/Inputs/addr.inp +++ b/llvm/test/tools/llvm-symbolizer/Inputs/addr.inp @@ -1,3 +1,3 @@ -some text +something not a valid address 0x40054d -some text2 +some text possibly a symbol diff --git a/llvm/test/tools/llvm-symbolizer/Inputs/discrim.inp b/llvm/test/tools/llvm-symbolizer/Inputs/discrim.inp index a5cfcb2558f35..2c4d722e32862 100644 --- a/llvm/test/tools/llvm-symbolizer/Inputs/discrim.inp +++ b/llvm/test/tools/llvm-symbolizer/Inputs/discrim.inp @@ -5,4 +5,4 @@ some text 0x4005b9 0x4005ce 0x4005d4 -some more text +another text diff --git a/llvm/test/tools/llvm-symbolizer/Inputs/symbols.h b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.h new file mode 100644 index 0000000000000..b097c4d9dc00a --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.h @@ -0,0 +1,19 @@ +// This file is a part of sources used to build `symbols.so`, which is used to +// test symbol location search made by llvm-symbolizer. +// +// Build instructions: +// $ mkdir /tmp/dbginfo +// $ cp symbols.h symbols.part1.cpp symbols.part2.cpp symbols.part3.c symbols.part4.c /tmp/dbginfo/ +// $ cd /tmp/dbginfo +// $ gcc -osymbols.so -shared -fPIC -g symbols.part1.cpp symbols.part2.cpp symbols.part3.c symbols.part4.c + + +extern "C" { +extern int global_01; +int func_01(); +int func_02(int); +} + +template T func_03(T x) { + return x + T(1); +} diff --git a/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part1.cpp b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part1.cpp new file mode 100644 index 0000000000000..ad4b3e34411aa --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part1.cpp @@ -0,0 +1,25 @@ +#include "symbols.h" + +int global_01 = 22; + +int static static_var = 0; + +static int static_func_01(int x) { + static_var = x; + return global_01; +} + +int func_01() { + int res = 1; + return res + static_func_01(22); +} + +int func_04() { + static_var = 0; + return 22; +} + +int func_04(int x) { + int res = static_var; + return res + func_03(x); +} diff --git a/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part2.cpp b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part2.cpp new file mode 100644 index 0000000000000..35e66d62622f8 --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part2.cpp @@ -0,0 +1,18 @@ +#include "symbols.h" + +int static static_var = 4; + +static int static_func_01(int x) { + static_var--; + return x; +} + +int func_02(int x) { + static_var = x; + return static_func_01(x); +} + +int func_05(int x) { + int res = static_var; + return res + func_03(x); +} diff --git a/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part3.c b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part3.c new file mode 100644 index 0000000000000..1284be505b6ba --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part3.c @@ -0,0 +1,12 @@ +static int static_func(int); +static int static_var = 0; + +int static_func(int x) { + static_var++; + return static_var + x; +} + +int func_06(int x) { + return static_func(x); +} + diff --git a/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part4.c b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part4.c new file mode 100644 index 0000000000000..de2ac81d2a78c --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part4.c @@ -0,0 +1,13 @@ +static int static_func(int); +static int static_var = 5; + +int static_func(int x) { + static_var++; + return static_var + x; +} + +int func_07(int x) { + static_var++; + return static_func(x); +} + diff --git a/llvm/test/tools/llvm-symbolizer/Inputs/symbols.so b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.so new file mode 100755 index 0000000000000..ceacd9845a8d8 Binary files /dev/null and b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.so differ diff --git a/llvm/test/tools/llvm-symbolizer/output-style-json-code.test b/llvm/test/tools/llvm-symbolizer/output-style-json-code.test index 9179b673f39fd..0e0e61c0bf119 100644 --- a/llvm/test/tools/llvm-symbolizer/output-style-json-code.test +++ b/llvm/test/tools/llvm-symbolizer/output-style-json-code.test @@ -25,39 +25,44 @@ # RUN: llvm-symbolizer --output-style=JSON --no-inlines -e %p/Inputs/addr.exe < %p/Inputs/addr.inp | \ # RUN: FileCheck %s --check-prefix=NO-INLINES --strict-whitespace --match-full-lines --implicit-check-not={{.}} ## Invalid first argument before any valid one. -# NO-INLINES:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]} +# NO-INLINES:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"something"} ## Resolve valid address. # NO-INLINES-NEXT:{"Address":"0x40054d","ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":3,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"main","Line":3,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":2}]} ## Invalid argument after a valid one. -# NO-INLINES-NEXT:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]} +# NO-INLINES-NEXT:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"some"} ## This test case is testing stdin input, inlines by default. # RUN: llvm-symbolizer --output-style=JSON -e %p/Inputs/addr.exe < %p/Inputs/addr.inp | \ # RUN: FileCheck %s --check-prefix=INLINE --strict-whitespace --match-full-lines --implicit-check-not={{.}} ## Invalid first argument before any valid one. -# INLINE:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]} +# INLINE:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"something"} ## Resolve valid address. # INLINE-NEXT:{"Address":"0x40054d","ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":3,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"inctwo","Line":3,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":2},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"inc","Line":7,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":6},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"main","Line":14,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":12}]} ## Invalid argument after a valid one. -# INLINE-NEXT:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]} +# INLINE-NEXT:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"some"} ## Also check the last test case with llvm-adr2line. ## The expected result is the same with -f -i. # RUN: llvm-addr2line --output-style=JSON -f -i -e %p/Inputs/addr.exe < %p/Inputs/addr.inp | \ # RUN: FileCheck %s --check-prefix=INLINE-A2L --strict-whitespace --match-full-lines --implicit-check-not={{.}} ## Invalid first argument before any valid one. -# INLINE-A2L:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]} +# INLINE-A2L:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"something"} ## Resolve valid address. # INLINE-A2L-NEXT:{"Address":"0x40054d","ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":3,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"inctwo","Line":3,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":2},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"inc","Line":7,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":6},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"main","Line":14,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":12}]} ## Invalid argument after a valid one. -# INLINE-A2L-NEXT:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]} +# INLINE-A2L:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"some"} ## Note llvm-addr2line without -f does not print the function name in JSON too. # RUN: llvm-addr2line --output-style=JSON -i -e %p/Inputs/addr.exe < %p/Inputs/addr.inp | \ # RUN: FileCheck %s --check-prefix=NO-FUNC-A2L --strict-whitespace --match-full-lines --implicit-check-not={{.}} ## Invalid first argument before any valid one. -# NO-FUNC-A2L:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]} +# NO-FUNC-A2L:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"something"} ## Resolve valid address. # NO-FUNC-A2L-NEXT:{"Address":"0x40054d","ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":3,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"","Line":3,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":2},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"","Line":7,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":6},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"","Line":14,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":12}]} ## Invalid argument after a valid one. -# NO-FUNC-A2L-NEXT:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]} +# NO-FUNC-A2L-NEXT:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"some"} + +## When a module offset is specified by a symbol, more than one source location can be found. +# RUN: llvm-symbolizer --output-style=JSON --no-inlines -e %p/Inputs/symbols.so "static_func" | \ +# RUN: FileCheck %s --check-prefix=MULTIPLE --strict-whitespace --match-full-lines --implicit-check-not={{.}} +# MULTIPLE:[{"Loc":[{"Column":24,"Discriminator":0,"FileName":"/tmp/dbginfo{{/|\\\\}}symbols.part3.c","FunctionName":"static_func","Line":4,"StartAddress":"0x121d","StartFileName":"/tmp/dbginfo{{/|\\\\}}symbols.part3.c","StartLine":4},{"Column":24,"Discriminator":0,"FileName":"/tmp/dbginfo{{/|\\\\}}symbols.part4.c","FunctionName":"static_func","Line":4,"StartAddress":"0x125f","StartFileName":"/tmp/dbginfo{{/|\\\\}}symbols.part4.c","StartLine":4}],"ModuleName":"{{.*}}Inputs/symbols.so","SymName":"static_func"}] diff --git a/llvm/test/tools/llvm-symbolizer/output-style-json-data.test b/llvm/test/tools/llvm-symbolizer/output-style-json-data.test index 722ac73d75104..b91555937086e 100644 --- a/llvm/test/tools/llvm-symbolizer/output-style-json-data.test +++ b/llvm/test/tools/llvm-symbolizer/output-style-json-data.test @@ -9,8 +9,8 @@ ## Handle invalid argument. # RUN: llvm-symbolizer "DATA tmp.o Z" --output-style=JSON | \ -# RUN: FileCheck %s --check-prefix=INVARG --strict-whitespace --match-full-lines --implicit-check-not={{.}} -# INVARG:[{"ModuleName":"tmp.o","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}] +# RUN: FileCheck %s -DMSG=%errc_ENOENT --check-prefix=INVARG --strict-whitespace --match-full-lines --implicit-check-not={{.}} +# INVARG:[{"Error":{"Message":"[[MSG]]"},"ModuleName":"tmp.o","SymName":"Z"}] # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o diff --git a/llvm/test/tools/llvm-symbolizer/output-style-json-frame.ll b/llvm/test/tools/llvm-symbolizer/output-style-json-frame.ll index 8a99345420fde..f82d6704ef8f9 100644 --- a/llvm/test/tools/llvm-symbolizer/output-style-json-frame.ll +++ b/llvm/test/tools/llvm-symbolizer/output-style-json-frame.ll @@ -9,8 +9,8 @@ ;; Handle invalid argument. ; RUN: llvm-symbolizer "FRAME tmp.o Z" --output-style=JSON | \ -; RUN: FileCheck %s --check-prefix=INVARG --strict-whitespace --match-full-lines --implicit-check-not={{.}} -; INVARG:[{"ModuleName":"tmp.o","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}] +; RUN: FileCheck %s -DMSG=%errc_ENOENT --check-prefix=INVARG --strict-whitespace --match-full-lines --implicit-check-not={{.}} +; INVARG:[{"Error":{"Message":"[[MSG]]"},"ModuleName":"tmp.o","SymName":"Z"}] ; RUN: llc -filetype=obj -o %t.o %s diff --git a/llvm/test/tools/llvm-symbolizer/symbol-search.test b/llvm/test/tools/llvm-symbolizer/symbol-search.test new file mode 100644 index 0000000000000..634229c2e74c0 --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/symbol-search.test @@ -0,0 +1,65 @@ +# This test checks the case when an address is specified by a symbol name rather +# than a number. +# +# It uses ELF shared object `Inputs/symbols.so` built for x86_64 using +# the instructions from `Inputs/symbols.h`. + +# Show that the "CODE" command supports search by symbol name. +RUN: llvm-addr2line --obj=%p/Inputs/symbols.so "CODE func_01" | FileCheck --check-prefix=CODE-CMD %s +RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so "CODE func_01" | FileCheck --check-prefix=CODE-CMD %s +CODE-CMD: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:12 + +# Check if a symbol name can be specified on the command-line. +RUN: llvm-addr2line -e %p/Inputs/symbols.so func_01 | FileCheck --check-prefix=SYMB %s +RUN: llvm-symbolizer -e %p/Inputs/symbols.so func_01 | FileCheck --check-prefix=SYMB %s +SYMB: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:12 + +# Check that if a symbol has a space in its name, ignore everything from the space onwards. +RUN: llvm-addr2line -e %p/Inputs/symbols.so "func_01 ignored text" | FileCheck --check-prefix=SYMB %s +RUN: llvm-symbolizer -e %p/Inputs/symbols.so "func_01 ignored text" | FileCheck --check-prefix=SYMB %s + +# Show that a symbol name may be resolved to more than one location. +RUN: llvm-addr2line -e %p/Inputs/symbols.so static_func | FileCheck --check-prefix=SYMB-MULTI %s +SYMB-MULTI: /tmp/dbginfo{{[/\]+}}symbols.part3.c:4 +SYMB-MULTI-NEXT: /tmp/dbginfo{{[/\]+}}symbols.part4.c:4 + +# Show that if a symbol is not found, a special mark is printed. +RUN: llvm-addr2line --obj=%p/Inputs/symbols.so func_666 | FileCheck --check-prefix=NONEXISTENT %s +RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so func_666 | FileCheck --check-prefix=NONEXISTENT %s +NONEXISTENT: ?? + +# Show that more than one symbol may be specified. +RUN: llvm-addr2line --obj=%p/Inputs/symbols.so func_01 func_02 | FileCheck --check-prefix=FUNCS %s +RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so func_01 func_02 | FileCheck --check-prefix=FUNCS %s +FUNCS: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:12 +FUNCS: /tmp/dbginfo{{[/\]+}}symbols.part2.cpp:10 + +# Show that C++ mangled names may be specified. +RUN: llvm-addr2line --obj=%p/Inputs/symbols.so _ZL14static_func_01i | FileCheck --check-prefix=MULTI-CXX %s +RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so _ZL14static_func_01i | FileCheck --check-prefix=MULTI-CXX %s +MULTI-CXX: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:7 +MULTI-CXX: /tmp/dbginfo{{[/\]+}}symbols.part2.cpp:5 + +# Show that containing function name can be printed in mangled form. +RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so --no-demangle _Z7func_04i | FileCheck --check-prefix=MANGLED %s +RUN: llvm-addr2line --obj=%p/Inputs/symbols.so -f _Z7func_04i | FileCheck --check-prefix=MANGLED %s +MANGLED: _Z7func_04i +MANGLED-NEXT: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:22 + +# Show that containing function name can be printed in demangled form. +RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so _Z7func_04i | FileCheck --check-prefix=NOTMANGLED %s +RUN: llvm-addr2line --obj=%p/Inputs/symbols.so -f --demangle _Z7func_04i | FileCheck --check-prefix=NOTMANGLED %s +NOTMANGLED: func_04(int) +NOTMANGLED-NEXT: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:22 + +# Show that both the symbol and input file can be specified in the search string on the command-line. +RUN: llvm-addr2line "%p/Inputs/symbols.so func_01" | FileCheck --check-prefix=SYMBIN %s +RUN: llvm-symbolizer "%p/Inputs/symbols.so func_01" | FileCheck --check-prefix=SYMBIN %s +SYMBIN: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:12 + +# Show that the case of missing input file specified in the search string on the command-line is properly treated. +RUN: llvm-addr2line "%p/Inputs/666.so func_01" 2> %t.1.stderr | FileCheck --check-prefix=NONEXISTENT %s +RUN: FileCheck --input-file=%t.1.stderr --check-prefix=BINARY-NOT-FOUND -DMSG=%errc_ENOENT %s +RUN: llvm-symbolizer "%p/Inputs/666.so func_01" 2> %t.2.stderr | FileCheck --check-prefix=NONEXISTENT %s +RUN: FileCheck --input-file=%t.2.stderr --check-prefix=BINARY-NOT-FOUND -DMSG=%errc_ENOENT %s +BINARY-NOT-FOUND: error: '{{.*}}666.so': [[MSG]] diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp index 78a0e6772f3fb..447c18abadc17 100644 --- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -159,7 +159,7 @@ static Error makeStringError(StringRef Msg) { static Error parseCommand(StringRef BinaryName, bool IsAddr2Line, StringRef InputString, Command &Cmd, std::string &ModuleName, object::BuildID &BuildID, - uint64_t &ModuleOffset) { + StringRef &Symbol, uint64_t &ModuleOffset) { ModuleName = BinaryName; if (InputString.consume_front("CODE ")) { Cmd = Command::Code; @@ -224,35 +224,41 @@ static Error parseCommand(StringRef BinaryName, bool IsAddr2Line, return makeStringError("no input filename has been specified"); } - // Parse module offset. + // Parse module offset, which can be specified as a number or as a symbol. InputString = InputString.ltrim(); if (InputString.empty()) return makeStringError("no module offset has been specified"); + + // If input string contains a space, ignore everything after it. This behavior + // is consistent with GNU addr2line. int OffsetLength = InputString.find_first_of(" \n\r"); StringRef Offset = InputString.substr(0, OffsetLength); + // GNU addr2line assumes the offset is hexadecimal and allows a redundant // "0x" or "0X" prefix; do the same for compatibility. if (IsAddr2Line) Offset.consume_front("0x") || Offset.consume_front("0X"); - // If the input is not a valid module offset, it is not an error, but its - // lookup does not make sense. Return error of different kind to distinguish - // from error or success. - if (Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset)) - return errorCodeToError(errc::invalid_argument); + // If the input is not a number, treat it is a symbol. + if (Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset)) { + Symbol = Offset; + ModuleOffset = 0; + } return Error::success(); } template void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd, - uint64_t Offset, uint64_t AdjustVMA, bool ShouldInline, - OutputStyle Style, LLVMSymbolizer &Symbolizer, - DIPrinter &Printer) { + StringRef Symbol, uint64_t Offset, uint64_t AdjustVMA, + bool ShouldInline, OutputStyle Style, + LLVMSymbolizer &Symbolizer, DIPrinter &Printer) { uint64_t AdjustedOffset = Offset - AdjustVMA; object::SectionedAddress Address = {AdjustedOffset, object::SectionedAddress::UndefSection}; - Request SymRequest = {ModuleName, Offset}; + Request SymRequest = { + ModuleName, Symbol.empty() ? std::make_optional(Offset) : std::nullopt, + Symbol}; if (Cmd == Command::Data) { Expected ResOrErr = Symbolizer.symbolizeData(ModuleSpec, Address); print(SymRequest, ResOrErr, Printer); @@ -260,6 +266,10 @@ void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd, Expected> ResOrErr = Symbolizer.symbolizeFrame(ModuleSpec, Address); print(SymRequest, ResOrErr, Printer); + } else if (!Symbol.empty()) { + Expected> ResOrErr = + Symbolizer.findSymbol(ModuleSpec, Symbol); + print(SymRequest, ResOrErr, Printer); } else if (ShouldInline) { Expected ResOrErr = Symbolizer.symbolizeInlinedCode(ModuleSpec, Address); @@ -288,7 +298,7 @@ void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd, } static void printUnknownLineInfo(std::string ModuleName, DIPrinter &Printer) { - Request SymRequest = {ModuleName, std::nullopt}; + Request SymRequest = {ModuleName, std::nullopt, StringRef()}; Printer.print(SymRequest, DILineInfo()); } @@ -301,16 +311,14 @@ static void symbolizeInput(const opt::InputArgList &Args, std::string ModuleName; object::BuildID BuildID(IncomingBuildID.begin(), IncomingBuildID.end()); uint64_t Offset = 0; + StringRef Symbol; if (Error E = parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line, StringRef(InputString), Cmd, ModuleName, BuildID, - Offset)) { - handleAllErrors( - std::move(E), - [&](const StringError &EI) { - printError(EI, InputString); - printUnknownLineInfo(ModuleName, Printer); - }, - [&](const ECError &EI) { printUnknownLineInfo(ModuleName, Printer); }); + Symbol, Offset)) { + handleAllErrors(std::move(E), [&](const StringError &EI) { + printError(EI, InputString); + printUnknownLineInfo(ModuleName, Printer); + }); return; } bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line); @@ -319,11 +327,11 @@ static void symbolizeInput(const opt::InputArgList &Args, if (!Args.hasArg(OPT_no_debuginfod)) enableDebuginfod(Symbolizer, Args); std::string BuildIDStr = toHex(BuildID); - executeCommand(BuildIDStr, BuildID, Cmd, Offset, AdjustVMA, ShouldInline, - Style, Symbolizer, Printer); + executeCommand(BuildIDStr, BuildID, Cmd, Symbol, Offset, AdjustVMA, + ShouldInline, Style, Symbolizer, Printer); } else { - executeCommand(ModuleName, ModuleName, Cmd, Offset, AdjustVMA, ShouldInline, - Style, Symbolizer, Printer); + executeCommand(ModuleName, ModuleName, Cmd, Symbol, Offset, AdjustVMA, + ShouldInline, Style, Symbolizer, Printer); } } @@ -527,7 +535,7 @@ int llvm_symbolizer_main(int argc, char **argv, const llvm::ToolContext &) { if (auto *Arg = Args.getLastArg(OPT_obj_EQ); Arg) { auto Status = Symbolizer.getOrCreateModuleInfo(Arg->getValue()); if (!Status) { - Request SymRequest = {Arg->getValue(), 0}; + Request SymRequest = {Arg->getValue(), 0, StringRef()}; handleAllErrors(Status.takeError(), [&](const ErrorInfoBase &EI) { Printer->printError(SymRequest, EI); }); diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp index 5984be98d798a..682f79a540cdc 100644 --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -20,6 +20,7 @@ using ::llvm::DIInliningInfo; using ::llvm::DILineInfo; using ::llvm::DILineInfoSpecifier; using ::llvm::DILocal; +using ::llvm::StringRef; using ::llvm::memprof::CallStackMap; using ::llvm::memprof::Frame; using ::llvm::memprof::FrameId; @@ -53,6 +54,9 @@ class MockSymbolizer : public SymbolizableModule { virtual std::vector symbolizeFrame(SectionedAddress) const { llvm_unreachable("unused"); } + virtual std::vector findSymbol(StringRef Symbol) const { + llvm_unreachable("unused"); + } virtual bool isWin32Module() const { llvm_unreachable("unused"); } virtual uint64_t getModulePreferredBase() const { llvm_unreachable("unused");